Skip to content

Commit 1c93b05

Browse files
committed
chore(ecxamples): update live transcript for chunked file
1 parent 97d75d3 commit 1c93b05

File tree

1 file changed

+43
-7
lines changed

1 file changed

+43
-7
lines changed

examples/07-transcription-live-websocket.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@
22
Example: Live Transcription with WebSocket (Listen V1)
33
44
This example shows how to stream audio for real-time transcription using WebSocket.
5+
It reads an audio file, chunks it, and sends it as if it were microphone audio.
56
"""
67

8+
import os
9+
import threading
10+
import time
11+
import wave
712
from typing import Union
813

914
from dotenv import load_dotenv
@@ -21,6 +26,16 @@
2126

2227
ListenV1SocketClientResponse = Union[ListenV1Results, ListenV1Metadata, ListenV1UtteranceEnd, ListenV1SpeechStarted]
2328

29+
# Audio file properties (from ffprobe: sample_rate=44100 Hz, mono, PCM s16le)
30+
SAMPLE_RATE = 44100 # Hz
31+
CHANNELS = 1 # mono
32+
SAMPLE_WIDTH = 2 # 16-bit = 2 bytes per sample
33+
34+
# Calculate chunk size for 100ms of audio (to simulate real-time streaming)
35+
CHUNK_DURATION_MS = 100 # milliseconds
36+
CHUNK_SIZE = int(SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS * (CHUNK_DURATION_MS / 1000.0))
37+
CHUNK_DELAY = CHUNK_DURATION_MS / 1000.0 # Delay in seconds
38+
2439
client = DeepgramClient()
2540

2641
try:
@@ -42,14 +57,35 @@ def on_message(message: ListenV1SocketClientResponse) -> None:
4257
connection.on(EventType.CLOSE, lambda _: print("Connection closed"))
4358
connection.on(EventType.ERROR, lambda error: print(f"Error: {error}"))
4459

45-
# Start listening - this blocks until the connection closes
46-
# In production, you would send audio data here:
47-
# audio_path = os.path.join(os.path.dirname(__file__), "..", "fixtures", "audio.wav")
48-
# with open(audio_path, "rb") as audio_file:
49-
# audio_data = audio_file.read()
50-
# connection.send_listen_v_1_media(audio_data)
60+
# Start listening in a background thread (it blocks until connection closes)
61+
threading.Thread(target=connection.start_listening, daemon=True).start()
62+
63+
# Wait a moment for connection to establish
64+
time.sleep(0.5)
65+
66+
# Load audio file and extract raw PCM data
67+
audio_path = os.path.join(os.path.dirname(__file__), "fixtures", "audio.wav")
68+
print(f"Loading audio file: {audio_path}")
69+
70+
with wave.open(audio_path, "rb") as wav_file:
71+
# Read all audio frames as raw PCM data
72+
audio_data = wav_file.readframes(wav_file.getnframes())
73+
74+
print(f"Audio loaded: {len(audio_data)} bytes")
75+
print(f"Sending audio in {CHUNK_DURATION_MS}ms chunks...")
76+
77+
# Send audio in chunks with delays to simulate microphone input
78+
chunk_count = 0
79+
for i in range(0, len(audio_data), CHUNK_SIZE):
80+
chunk = audio_data[i : i + CHUNK_SIZE]
81+
if chunk:
82+
connection.send_listen_v_1_media(chunk)
83+
chunk_count += 1
84+
time.sleep(CHUNK_DELAY)
5185

52-
connection.start_listening()
86+
print(f"Finished sending {chunk_count} chunks")
87+
print("Waiting for final transcription...")
88+
time.sleep(2)
5389

5490
# For async version:
5591
# from deepgram import AsyncDeepgramClient

0 commit comments

Comments
 (0)