@@ -4,12 +4,17 @@ import { mkdir, mkdtemp, rm } from 'fs/promises';
44import { join } from 'path' ;
55import { tmpdir } from 'os' ;
66import { existsSync } from 'fs' ;
7+ import type { Snippet as Snippet_ } from 'hume/serialization/resources/tts/types' ;
8+ import type { Hume } from 'hume' ;
9+
10+ type Snippet = Hume . tts . Snippet ;
11+ type RawSnippet = Snippet_ . Raw ;
712
813// Test utility function for logging during tests
914// Only logs when BUN_TEST_VERBOSE=1 is set
10- function log ( message : string ) : void {
15+ function log ( ... args : any [ ] ) : void {
1116 if ( process . env . BUN_TEST_VERBOSE === '1' ) {
12- console . log ( message ) ;
17+ console . log ( ... args ) ;
1318 }
1419}
1520
@@ -24,7 +29,7 @@ class TestEnvironment {
2429
2530 async setup ( ) {
2631 await this . server . start ( ) ;
27- this . server . setupDefaultTtsHandler ( ) ;
32+ this . server . setupDefaultTtsStreamHandler ( ) ;
2833 this . apiUrl = this . server . getBaseUrl ( ) ;
2934
3035 // Set up test filesystem
@@ -83,7 +88,7 @@ class TestEnvironment {
8388 * Get the TTS API requests specifically
8489 */
8590 getTtsRequests ( ) {
86- return this . server . findRequestsTo ( '/v0/tts' ) ;
91+ return this . server . findRequestsTo ( '/v0/tts/stream/json ' ) ;
8792 }
8893
8994 /**
@@ -195,20 +200,7 @@ class TestEnvironment {
195200}
196201
197202interface MockTtsOptions {
198- generations ?: Array < {
199- generation_id ?: string ;
200- audio ?: string ;
201- duration ?: number ;
202- file_size ?: number ;
203- format ?: { type : string } ;
204- sample_rate ?: number ;
205- encoding ?: {
206- type : string ;
207- format : string ;
208- sample_rate : number ;
209- } ;
210- snippets ?: any [ ] ;
211- } > ;
203+ snippets ?: Array < RawSnippet > ;
212204 error ?: {
213205 status : number ;
214206 message : string ;
@@ -332,13 +324,13 @@ class MockHumeServer {
332324
333325 configureTtsResponse ( options : MockTtsOptions ) {
334326 this . ttsOptions = options ;
335- this . setupDefaultTtsHandler ( ) ;
327+ this . setupDefaultTtsStreamHandler ( ) ;
336328 }
337329
338330 // Default TTS response handler
339- setupDefaultTtsHandler ( ) {
331+ setupDefaultTtsStreamHandler ( ) {
340332 // Handle TTS API requests - actual path used by the client
341- this . addHandler ( '/v0/tts' , async ( req ) => {
333+ this . addHandler ( '/v0/tts/stream/json ' , async ( req ) => {
342334 try {
343335 const body = await req . json ( ) ;
344336
@@ -350,34 +342,28 @@ class MockHumeServer {
350342 } ) ;
351343 }
352344
353- // Determine number of generations to return
354345 const numGenerations = body . numGenerations || 1 ;
355346
356- // If specific generations are provided in options, use those
357- let generations ;
358- if ( this . ttsOptions . generations && this . ttsOptions . generations . length > 0 ) {
359- generations = this . ttsOptions . generations ;
347+ let snippets ;
348+ if ( this . ttsOptions . snippets && this . ttsOptions . snippets . length > 0 ) {
349+ snippets = this . ttsOptions . snippets ;
360350 } else {
361351 // Otherwise create default mock generations
362352 const mockAudio = Buffer . from ( 'mock-audio-data' ) . toString ( 'base64' ) ;
363353
364- generations = Array . from ( { length : numGenerations } , ( _ , i ) => ( {
354+ snippets = Array . from ( { length : numGenerations } , ( _ , i ) => ( {
365355 generation_id : `mock_gen_${ i + 1 } ` ,
366356 audio : mockAudio ,
367- duration : 1.5 , // in seconds
368- file_size : 100 , // in bytes
369- format : { type : 'wav' } ,
370- sample_rate : 44100 ,
371- encoding : {
372- type : 'base64' ,
373- format : 'wav' ,
374- sample_rate : 44100 ,
375- } ,
376- snippets : [ ] ,
357+ id : `mock_snippet_${ i + 1 } ` ,
358+ text : 'mock text' ,
359+ utteranceIndex : 0 ,
377360 } ) ) ;
378361 }
379362
380- return Response . json ( { generations } ) ;
363+ return new Response ( snippets ! . map ( ( x ) => JSON . stringify ( x ) + '\n' ) . join ( '' ) , {
364+ status : 200 ,
365+ headers : { 'Content-Type' : 'text-plain; charset=utf-8' } ,
366+ } ) ;
381367 } catch ( error ) {
382368 log ( `Error in mock handler: ${ error } ` ) ;
383369 return new Response ( JSON . stringify ( { error : 'Internal server error' } ) , {
@@ -467,24 +453,17 @@ describe('CLI End-to-End Tests', () => {
467453
468454 // Helper functions
469455 // Use NonNullable to ensure TypeScript knows we're accessing a valid type
470- const createGeneration = (
471- id : string ,
472- options : Partial < NonNullable < MockTtsOptions [ 'generations' ] > [ 0 ] > = { }
473- ) => ( {
474- generation_id : id ,
475- audio : Buffer . from ( `audio-data-${ id } ` ) . toString ( 'base64' ) ,
476- duration : 1.5 ,
477- file_size : 200 ,
478- format : { type : 'wav' } ,
479- sample_rate : 44100 ,
480- encoding : {
481- type : 'base64' ,
482- format : 'wav' ,
483- sample_rate : 44100 ,
484- } ,
485- snippets : [ ] ,
486- ...options ,
487- } ) ;
456+ const createSnippet = ( partial : Partial < Snippet > ) : RawSnippet => {
457+ const generationId = partial . generationId ?? 'test_gen_123' ;
458+ const id = partial . id ?? `${ generationId } -0` ;
459+ return {
460+ generation_id : generationId ,
461+ id,
462+ audio : Buffer . from ( `audio-data-${ generationId } -${ id } ` ) . toString ( 'base64' ) ,
463+ text : partial . text ?? 'test text' ,
464+ utterance_index : partial . utteranceIndex ?? 0 ,
465+ } ;
466+ } ;
488467
489468 // Helper to check common test failure details
490469 const logFailureDetails = ( result : { exitCode : number ; stdout : string ; stderr : string } ) => {
@@ -511,7 +490,7 @@ describe('CLI End-to-End Tests', () => {
511490 test ( 'Basic text-to-speech with description' , async ( ) => {
512491 // Configure a custom response
513492 testEnv . configureTtsResponse ( {
514- generations : [ createGeneration ( 'test_gen_123' , { duration : 2.5 } ) ] ,
493+ snippets : [ createSnippet ( { generationId : 'test_gen_123' } ) ] ,
515494 } ) ;
516495
517496 const outputDir = await testEnv . createOutputDir ( 'tts-output' ) ;
@@ -545,22 +524,10 @@ describe('CLI End-to-End Tests', () => {
545524 test ( 'Multiple generations with specific format' , async ( ) => {
546525 // Configure a custom response with multiple generations
547526 testEnv . configureTtsResponse ( {
548- generations : [
549- createGeneration ( 'multi_gen_1' , {
550- format : { type : 'mp3' } ,
551- encoding : { type : 'base64' , format : 'mp3' , sample_rate : 44100 } ,
552- duration : 1.0 ,
553- } ) ,
554- createGeneration ( 'multi_gen_2' , {
555- format : { type : 'mp3' } ,
556- encoding : { type : 'base64' , format : 'mp3' , sample_rate : 44100 } ,
557- duration : 1.2 ,
558- } ) ,
559- createGeneration ( 'multi_gen_3' , {
560- format : { type : 'mp3' } ,
561- encoding : { type : 'base64' , format : 'mp3' , sample_rate : 44100 } ,
562- duration : 1.3 ,
563- } ) ,
527+ snippets : [
528+ createSnippet ( { generationId : 'multi_gen_1' } ) ,
529+ createSnippet ( { generationId : 'multi_gen_2' } ) ,
530+ createSnippet ( { generationId : 'multi_gen_3' } ) ,
564531 ] ,
565532 } ) ;
566533
@@ -602,7 +569,7 @@ describe('CLI End-to-End Tests', () => {
602569 test ( 'Reading from stdin' , async ( ) => {
603570 // Configure a custom response
604571 testEnv . configureTtsResponse ( {
605- generations : [ createGeneration ( 'stdin_gen_123' , { duration : 1.8 } ) ] ,
572+ snippets : [ createSnippet ( { generationId : 'stdin_gen_123' } ) ] ,
606573 } ) ;
607574
608575 const inputText = 'This is text from standard input' ;
@@ -716,19 +683,10 @@ describe('CLI End-to-End Tests', () => {
716683
717684 // Configure the TTS responses for first call with 3 generations
718685 testEnv . configureTtsResponse ( {
719- generations : [
720- createGeneration ( 'config_test_gen_1' , {
721- format : { type : 'mp3' } ,
722- encoding : { type : 'base64' , format : 'mp3' , sample_rate : 44100 } ,
723- } ) ,
724- createGeneration ( 'config_test_gen_2' , {
725- format : { type : 'mp3' } ,
726- encoding : { type : 'base64' , format : 'mp3' , sample_rate : 44100 } ,
727- } ) ,
728- createGeneration ( 'config_test_gen_3' , {
729- format : { type : 'mp3' } ,
730- encoding : { type : 'base64' , format : 'mp3' , sample_rate : 44100 } ,
731- } ) ,
686+ snippets : [
687+ createSnippet ( { generationId : 'config_test_gen_1' } ) ,
688+ createSnippet ( { generationId : 'config_test_gen_2' } ) ,
689+ createSnippet ( { generationId : 'config_test_gen_3' } ) ,
732690 ] ,
733691 } ) ;
734692
@@ -771,12 +729,7 @@ describe('CLI End-to-End Tests', () => {
771729
772730 // Configure the TTS response for continuation
773731 testEnv . configureTtsResponse ( {
774- generations : [
775- createGeneration ( 'continuation_gen_1' , {
776- format : { type : 'mp3' } ,
777- encoding : { type : 'base64' , format : 'mp3' , sample_rate : 44100 } ,
778- } ) ,
779- ] ,
732+ snippets : [ createSnippet ( { generationId : 'continuation_gen_1' } ) ] ,
780733 } ) ;
781734
782735 // Step 4: Run TTS with continuation using --last and --last-index
0 commit comments