@@ -116,15 +116,15 @@ <h1>DECtalkMini</h1>
116116 "SIL" : "sil.svg" , "IY" : "ax_i_ii.svg" , "IH" : "ax_i_ii.svg" , "EY" : "ei.svg" , "EH" : "e.svg" ,
117117 "AE" : "ax_i_ii.svg" , "AA" : "a_aa_uh.svg" , "AY" : "a_aa_uh.svg" , "AW" : "a_aa_uh.svg" , "AH" : "a_aa_uh.svg" ,
118118 "AO" : "o.svg" , "OW" : "o.svg" , "OY" : "ei.svg" , "UH" : "a_aa_uh.svg" , "UW" : "o.svg" , "RR" : "n_k_g_ng_y_r.svg" ,
119- "YU" : "n_k_g_ng_y_r .svg" , "AX" : "ax_i_ii.svg" , "IX" : "ax_i_ii.svg" , "IR" : "n_k_g_ng_y_r.svg" ,
120- "ER" : "n_k_g_ng_y_r.svg" , "AR" : "n_k_g_ng_y_r.svg" , "OR" : "o .svg" , "UR" : "w_oo_uu_u .svg" ,
119+ "YU" : "o .svg" , "AX" : "ax_i_ii.svg" , "IX" : "ax_i_ii.svg" , "IR" : "n_k_g_ng_y_r.svg" ,
120+ "ER" : "n_k_g_ng_y_r.svg" , "AR" : "n_k_g_ng_y_r.svg" , "OR" : "n_k_g_ng_y_r .svg" , "UR" : "n_k_g_ng_y_r .svg" ,
121121 "W" : "w_oo_uu_u.svg" , "Y" : "n_k_g_ng_y_r.svg" , "R" : "n_k_g_ng_y_r.svg" , "LL" : "l.svg" ,
122122 "HX" : "ax_i_ii.svg" , "RX" : "n_k_g_ng_y_r.svg" , "LX" : "l.svg" , "M" : "p_b_m.svg" , "N" : "n_k_g_ng_y_r.svg" ,
123123 "NX" : "n_k_g_ng_y_r.svg" , "EL" : "l.svg" , "D_DENTALIZED" : "t_d_s_z.svg" , "EN" : "n_k_g_ng_y_r.svg" ,
124124 "F" : "f_v.svg" , "V" : "f_v.svg" , "TH" : "th_dh.svg" , "DH" : "th_dh.svg" , "S" : "t_d_s_z.svg" ,
125125 "Z" : "t_d_s_z.svg" , "SH" : "sh_zh_ch_jh.svg" , "ZH" : "sh_zh_ch_jh.svg" , "P" : "p_b_m.svg" ,
126126 "B" : "p_b_m.svg" , "T" : "t_d_s_z.svg" , "D" : "t_d_s_z.svg" , "K" : "n_k_g_ng_y_r.svg" ,
127- "G" : "n_k_g_ng_y_r.svg" , "DX" : "t_d_s_z.svg" , "TX" : "t_d_s_z.svg" , "Q" : "sil .svg" ,
127+ "G" : "n_k_g_ng_y_r.svg" , "DX" : "t_d_s_z.svg" , "TX" : "t_d_s_z.svg" , "Q" : "e .svg" ,
128128 "CH" : "sh_zh_ch_jh.svg" , "JH" : "sh_zh_ch_jh.svg" , "DF" : "t_d_s_z.svg"
129129 } ;
130130
@@ -143,15 +143,20 @@ <h1>DECtalkMini</h1>
143143 }
144144
145145 window . onPhoneCallback = function ( phoneme ) {
146- const currentSampleCount = tts_get_buffer_length ( ) ;
147- const timeInSeconds = currentSampleCount / currentSampleRate ;
146+ // Use buffer position BEFORE adding new samples for accurate timing
147+ const samplePosition = tts_get_buffer_length ( ) ;
148+ const timeInSeconds = samplePosition / currentSampleRate ;
148149 const phonemeName = phonemes [ phoneme ] || 'UNKNOWN' ;
149150
150- phonemeTimeline . push ( {
151- phone : phoneme ,
152- phonemeName : phonemeName ,
153- time : timeInSeconds
154- } ) ;
151+ // Only add if phoneme changed
152+ if ( phonemeTimeline . length === 0 || phonemeTimeline [ phonemeTimeline . length - 1 ] . phone !== phoneme ) {
153+ phonemeTimeline . push ( {
154+ phone : phoneme ,
155+ phonemeName : phonemeName ,
156+ time : timeInSeconds ,
157+ sample : samplePosition
158+ } ) ;
159+ }
155160 } ;
156161
157162 async function loadWASM ( ) {
@@ -384,23 +389,21 @@ <h1>DECtalkMini</h1>
384389 } ) ;
385390
386391 const duration = currentAudioBuffer . length / currentAudioBuffer . sampleRate ;
387- const fps = 30 ;
388- const frameDuration = 1000 / fps ;
389- const totalFrames = Math . ceil ( duration * fps ) ;
390392
391393 btnExportGif . textContent = 'Rendering...' ;
392394
393- for ( let frame = 0 ; frame < totalFrames ; frame ++ ) {
394- const timeInSeconds = frame / fps ;
395-
396- let currentPhoneme = 'SIL' ;
397- for ( let i = 0 ; i < phonemeTimeline . length ; i ++ ) {
398- if ( phonemeTimeline [ i ] . time <= timeInSeconds ) {
399- currentPhoneme = phonemeTimeline [ i ] . phonemeName ;
400- } else {
401- break ;
402- }
403- }
395+ console . log ( 'Audio length:' , currentAudioBuffer . length , 'samples at' , currentAudioBuffer . sampleRate , 'Hz' ) ;
396+ console . log ( 'Duration:' , duration , 'seconds' ) ;
397+ console . log ( 'Phoneme timeline entries:' , phonemeTimeline . length ) ;
398+ console . log ( 'First phoneme:' , phonemeTimeline [ 0 ] ) ;
399+ console . log ( 'Last phoneme:' , phonemeTimeline [ phonemeTimeline . length - 1 ] ) ;
400+
401+ // One frame per phoneme with exact duration
402+ for ( let i = 0 ; i < phonemeTimeline . length ; i ++ ) {
403+ const currentPhoneme = phonemeTimeline [ i ] . phonemeName ;
404+ const currentTime = phonemeTimeline [ i ] . time ;
405+ const nextTime = i < phonemeTimeline . length - 1 ? phonemeTimeline [ i + 1 ] . time : duration ;
406+ const phonemeDurationMs = Math . round ( ( nextTime - currentTime ) * 1000 ) ;
404407
405408 ctx . fillStyle = '#ffffff' ;
406409 ctx . fillRect ( 0 , 0 , canvas . width , canvas . height ) ;
@@ -419,7 +422,31 @@ <h1>DECtalkMini</h1>
419422 ctx . textAlign = 'center' ;
420423 ctx . fillText ( currentPhoneme , canvas . width / 2 , canvas . height - 30 ) ;
421424
422- gif . addFrame ( ctx , { copy : true , delay : frameDuration } ) ;
425+ gif . addFrame ( ctx , { copy : true , delay : phonemeDurationMs } ) ;
426+ }
427+
428+ // Add 1 second of padding frames (30 frames at 33ms each)
429+ const lastPhoneme = phonemeTimeline [ phonemeTimeline . length - 1 ] . phonemeName ;
430+ const lastImageName = imageMap [ lastPhoneme ] ;
431+
432+ for ( let pad = 0 ; pad < 30 ; pad ++ ) {
433+ ctx . fillStyle = '#ffffff' ;
434+ ctx . fillRect ( 0 , 0 , canvas . width , canvas . height ) ;
435+
436+ if ( lastImageName && preloadedImages [ lastImageName ] ) {
437+ const img = preloadedImages [ lastImageName ] ;
438+ const imgSize = 300 ;
439+ const x = ( canvas . width - imgSize ) / 2 ;
440+ const y = ( canvas . height - imgSize ) / 2 ;
441+ ctx . drawImage ( img , x , y , imgSize , imgSize ) ;
442+ }
443+
444+ ctx . fillStyle = '#000000' ;
445+ ctx . font = 'bold 32px monospace' ;
446+ ctx . textAlign = 'center' ;
447+ ctx . fillText ( lastPhoneme , canvas . width / 2 , canvas . height - 30 ) ;
448+
449+ gif . addFrame ( ctx , { copy : true , delay : 33 } ) ;
423450 }
424451
425452 btnExportGif . textContent = 'Encoding...' ;
0 commit comments