chore(ecxamples): update live transcript for chunked file

lukeocodes · lukeocodes · commit 1c93b050e366 · 2025-12-23T16:09:24.000Z
diff --git a/examples/07-transcription-live-websocket.py b/examples/07-transcription-live-websocket.py
@@ -2,8 +2,13 @@
 Example: Live Transcription with WebSocket (Listen V1)
 
 This example shows how to stream audio for real-time transcription using WebSocket.
+It reads an audio file, chunks it, and sends it as if it were microphone audio.
 """
 
+import os
+import threading
+import time
+import wave
 from typing import Union
 
 from dotenv import load_dotenv
@@ -21,6 +26,16 @@
 
 ListenV1SocketClientResponse = Union[ListenV1Results, ListenV1Metadata, ListenV1UtteranceEnd, ListenV1SpeechStarted]
 
+# Audio file properties (from ffprobe: sample_rate=44100 Hz, mono, PCM s16le)
+SAMPLE_RATE = 44100  # Hz
+CHANNELS = 1  # mono
+SAMPLE_WIDTH = 2  # 16-bit = 2 bytes per sample
+
+# Calculate chunk size for 100ms of audio (to simulate real-time streaming)
+CHUNK_DURATION_MS = 100  # milliseconds
+CHUNK_SIZE = int(SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS * (CHUNK_DURATION_MS / 1000.0))
+CHUNK_DELAY = CHUNK_DURATION_MS / 1000.0  # Delay in seconds
+
 client = DeepgramClient()
 
 try:
@@ -42,14 +57,35 @@ def on_message(message: ListenV1SocketClientResponse) -> None:
         connection.on(EventType.CLOSE, lambda _: print("Connection closed"))
         connection.on(EventType.ERROR, lambda error: print(f"Error: {error}"))
 
-        # Start listening - this blocks until the connection closes
-        # In production, you would send audio data here:
-        # audio_path = os.path.join(os.path.dirname(__file__), "..", "fixtures", "audio.wav")
-        # with open(audio_path, "rb") as audio_file:
-        #     audio_data = audio_file.read()
-        #     connection.send_listen_v_1_media(audio_data)
+        # Start listening in a background thread (it blocks until connection closes)
+        threading.Thread(target=connection.start_listening, daemon=True).start()
+
+        # Wait a moment for connection to establish
+        time.sleep(0.5)
+
+        # Load audio file and extract raw PCM data
+        audio_path = os.path.join(os.path.dirname(__file__), "fixtures", "audio.wav")
+        print(f"Loading audio file: {audio_path}")
+
+        with wave.open(audio_path, "rb") as wav_file:
+            # Read all audio frames as raw PCM data
+            audio_data = wav_file.readframes(wav_file.getnframes())
+
+        print(f"Audio loaded: {len(audio_data)} bytes")
+        print(f"Sending audio in {CHUNK_DURATION_MS}ms chunks...")
+
+        # Send audio in chunks with delays to simulate microphone input
+        chunk_count = 0
+        for i in range(0, len(audio_data), CHUNK_SIZE):
+            chunk = audio_data[i : i + CHUNK_SIZE]
+            if chunk:
+                connection.send_listen_v_1_media(chunk)
+                chunk_count += 1
+                time.sleep(CHUNK_DELAY)
 
-        connection.start_listening()
+        print(f"Finished sending {chunk_count} chunks")
+        print("Waiting for final transcription...")
+        time.sleep(2)
 
     # For async version:
     # from deepgram import AsyncDeepgramClient