Skip to content

Commit bcf8c49

Browse files
author
ByteSizedFox
committed
expand wasm buffer and fix alignment on gif export
1 parent 23284b7 commit bcf8c49

File tree

2 files changed

+53
-26
lines changed

2 files changed

+53
-26
lines changed

wasm/index.html

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,15 @@ <h1>DECtalkMini</h1>
116116
"SIL": "sil.svg", "IY": "ax_i_ii.svg", "IH": "ax_i_ii.svg", "EY": "ei.svg", "EH": "e.svg",
117117
"AE": "ax_i_ii.svg", "AA": "a_aa_uh.svg", "AY": "a_aa_uh.svg", "AW": "a_aa_uh.svg", "AH": "a_aa_uh.svg",
118118
"AO": "o.svg", "OW": "o.svg", "OY": "ei.svg", "UH": "a_aa_uh.svg", "UW": "o.svg", "RR": "n_k_g_ng_y_r.svg",
119-
"YU": "n_k_g_ng_y_r.svg", "AX": "ax_i_ii.svg", "IX": "ax_i_ii.svg", "IR": "n_k_g_ng_y_r.svg",
120-
"ER": "n_k_g_ng_y_r.svg", "AR": "n_k_g_ng_y_r.svg", "OR": "o.svg", "UR": "w_oo_uu_u.svg",
119+
"YU": "o.svg", "AX": "ax_i_ii.svg", "IX": "ax_i_ii.svg", "IR": "n_k_g_ng_y_r.svg",
120+
"ER": "n_k_g_ng_y_r.svg", "AR": "n_k_g_ng_y_r.svg", "OR": "n_k_g_ng_y_r.svg", "UR": "n_k_g_ng_y_r.svg",
121121
"W": "w_oo_uu_u.svg", "Y": "n_k_g_ng_y_r.svg", "R": "n_k_g_ng_y_r.svg", "LL": "l.svg",
122122
"HX": "ax_i_ii.svg", "RX": "n_k_g_ng_y_r.svg", "LX": "l.svg", "M": "p_b_m.svg", "N": "n_k_g_ng_y_r.svg",
123123
"NX": "n_k_g_ng_y_r.svg", "EL": "l.svg", "D_DENTALIZED": "t_d_s_z.svg", "EN": "n_k_g_ng_y_r.svg",
124124
"F": "f_v.svg", "V": "f_v.svg", "TH": "th_dh.svg", "DH": "th_dh.svg", "S": "t_d_s_z.svg",
125125
"Z": "t_d_s_z.svg", "SH": "sh_zh_ch_jh.svg", "ZH": "sh_zh_ch_jh.svg", "P": "p_b_m.svg",
126126
"B": "p_b_m.svg", "T": "t_d_s_z.svg", "D": "t_d_s_z.svg", "K": "n_k_g_ng_y_r.svg",
127-
"G": "n_k_g_ng_y_r.svg", "DX": "t_d_s_z.svg", "TX": "t_d_s_z.svg", "Q": "sil.svg",
127+
"G": "n_k_g_ng_y_r.svg", "DX": "t_d_s_z.svg", "TX": "t_d_s_z.svg", "Q": "e.svg",
128128
"CH": "sh_zh_ch_jh.svg", "JH": "sh_zh_ch_jh.svg", "DF": "t_d_s_z.svg"
129129
};
130130

@@ -143,15 +143,20 @@ <h1>DECtalkMini</h1>
143143
}
144144

145145
window.onPhoneCallback = function(phoneme) {
146-
const currentSampleCount = tts_get_buffer_length();
147-
const timeInSeconds = currentSampleCount / currentSampleRate;
146+
// Use buffer position BEFORE adding new samples for accurate timing
147+
const samplePosition = tts_get_buffer_length();
148+
const timeInSeconds = samplePosition / currentSampleRate;
148149
const phonemeName = phonemes[phoneme] || 'UNKNOWN';
149150

150-
phonemeTimeline.push({
151-
phone: phoneme,
152-
phonemeName: phonemeName,
153-
time: timeInSeconds
154-
});
151+
// Only add if phoneme changed
152+
if (phonemeTimeline.length === 0 || phonemeTimeline[phonemeTimeline.length - 1].phone !== phoneme) {
153+
phonemeTimeline.push({
154+
phone: phoneme,
155+
phonemeName: phonemeName,
156+
time: timeInSeconds,
157+
sample: samplePosition
158+
});
159+
}
155160
};
156161

157162
async function loadWASM() {
@@ -384,23 +389,21 @@ <h1>DECtalkMini</h1>
384389
});
385390

386391
const duration = currentAudioBuffer.length / currentAudioBuffer.sampleRate;
387-
const fps = 30;
388-
const frameDuration = 1000 / fps;
389-
const totalFrames = Math.ceil(duration * fps);
390392

391393
btnExportGif.textContent = 'Rendering...';
392394

393-
for (let frame = 0; frame < totalFrames; frame++) {
394-
const timeInSeconds = frame / fps;
395-
396-
let currentPhoneme = 'SIL';
397-
for (let i = 0; i < phonemeTimeline.length; i++) {
398-
if (phonemeTimeline[i].time <= timeInSeconds) {
399-
currentPhoneme = phonemeTimeline[i].phonemeName;
400-
} else {
401-
break;
402-
}
403-
}
395+
console.log('Audio length:', currentAudioBuffer.length, 'samples at', currentAudioBuffer.sampleRate, 'Hz');
396+
console.log('Duration:', duration, 'seconds');
397+
console.log('Phoneme timeline entries:', phonemeTimeline.length);
398+
console.log('First phoneme:', phonemeTimeline[0]);
399+
console.log('Last phoneme:', phonemeTimeline[phonemeTimeline.length - 1]);
400+
401+
// One frame per phoneme with exact duration
402+
for (let i = 0; i < phonemeTimeline.length; i++) {
403+
const currentPhoneme = phonemeTimeline[i].phonemeName;
404+
const currentTime = phonemeTimeline[i].time;
405+
const nextTime = i < phonemeTimeline.length - 1 ? phonemeTimeline[i + 1].time : duration;
406+
const phonemeDurationMs = Math.round((nextTime - currentTime) * 1000);
404407

405408
ctx.fillStyle = '#ffffff';
406409
ctx.fillRect(0, 0, canvas.width, canvas.height);
@@ -419,7 +422,31 @@ <h1>DECtalkMini</h1>
419422
ctx.textAlign = 'center';
420423
ctx.fillText(currentPhoneme, canvas.width / 2, canvas.height - 30);
421424

422-
gif.addFrame(ctx, {copy: true, delay: frameDuration});
425+
gif.addFrame(ctx, {copy: true, delay: phonemeDurationMs});
426+
}
427+
428+
// Add 1 second of padding frames (30 frames at 33ms each)
429+
const lastPhoneme = phonemeTimeline[phonemeTimeline.length - 1].phonemeName;
430+
const lastImageName = imageMap[lastPhoneme];
431+
432+
for (let pad = 0; pad < 30; pad++) {
433+
ctx.fillStyle = '#ffffff';
434+
ctx.fillRect(0, 0, canvas.width, canvas.height);
435+
436+
if (lastImageName && preloadedImages[lastImageName]) {
437+
const img = preloadedImages[lastImageName];
438+
const imgSize = 300;
439+
const x = (canvas.width - imgSize) / 2;
440+
const y = (canvas.height - imgSize) / 2;
441+
ctx.drawImage(img, x, y, imgSize, imgSize);
442+
}
443+
444+
ctx.fillStyle = '#000000';
445+
ctx.font = 'bold 32px monospace';
446+
ctx.textAlign = 'center';
447+
ctx.fillText(lastPhoneme, canvas.width / 2, canvas.height - 30);
448+
449+
gif.addFrame(ctx, {copy: true, delay: 33});
423450
}
424451

425452
btnExportGif.textContent = 'Encoding...';

wasm/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include <emscripten.h>
44
#include <string.h>
55

6-
#define MAX_BUFFER_SIZE 65536
6+
#define MAX_BUFFER_SIZE 65536 * 20
77
static short audio_buffer[MAX_BUFFER_SIZE];
88
static int buffer_index = 0;
99
static int sample_rate = 11025;

0 commit comments

Comments
 (0)