22Example: Live Transcription with WebSocket (Listen V1)
33
44This example shows how to stream audio for real-time transcription using WebSocket.
5+ It reads an audio file, chunks it, and sends it as if it were microphone audio.
56"""
67
8+ import os
9+ import threading
10+ import time
11+ import wave
712from typing import Union
813
914from dotenv import load_dotenv
2126
2227ListenV1SocketClientResponse = Union [ListenV1Results , ListenV1Metadata , ListenV1UtteranceEnd , ListenV1SpeechStarted ]
2328
29+ # Audio file properties (from ffprobe: sample_rate=44100 Hz, mono, PCM s16le)
30+ SAMPLE_RATE = 44100 # Hz
31+ CHANNELS = 1 # mono
32+ SAMPLE_WIDTH = 2 # 16-bit = 2 bytes per sample
33+
34+ # Calculate chunk size for 100ms of audio (to simulate real-time streaming)
35+ CHUNK_DURATION_MS = 100 # milliseconds
36+ CHUNK_SIZE = int (SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS * (CHUNK_DURATION_MS / 1000.0 ))
37+ CHUNK_DELAY = CHUNK_DURATION_MS / 1000.0 # Delay in seconds
38+
2439client = DeepgramClient ()
2540
2641try :
@@ -42,14 +57,35 @@ def on_message(message: ListenV1SocketClientResponse) -> None:
4257 connection .on (EventType .CLOSE , lambda _ : print ("Connection closed" ))
4358 connection .on (EventType .ERROR , lambda error : print (f"Error: { error } " ))
4459
45- # Start listening - this blocks until the connection closes
46- # In production, you would send audio data here:
47- # audio_path = os.path.join(os.path.dirname(__file__), "..", "fixtures", "audio.wav")
48- # with open(audio_path, "rb") as audio_file:
49- # audio_data = audio_file.read()
50- # connection.send_listen_v_1_media(audio_data)
60+ # Start listening in a background thread (it blocks until connection closes)
61+ threading .Thread (target = connection .start_listening , daemon = True ).start ()
62+
63+ # Wait a moment for connection to establish
64+ time .sleep (0.5 )
65+
66+ # Load audio file and extract raw PCM data
67+ audio_path = os .path .join (os .path .dirname (__file__ ), "fixtures" , "audio.wav" )
68+ print (f"Loading audio file: { audio_path } " )
69+
70+ with wave .open (audio_path , "rb" ) as wav_file :
71+ # Read all audio frames as raw PCM data
72+ audio_data = wav_file .readframes (wav_file .getnframes ())
73+
74+ print (f"Audio loaded: { len (audio_data )} bytes" )
75+ print (f"Sending audio in { CHUNK_DURATION_MS } ms chunks..." )
76+
77+ # Send audio in chunks with delays to simulate microphone input
78+ chunk_count = 0
79+ for i in range (0 , len (audio_data ), CHUNK_SIZE ):
80+ chunk = audio_data [i : i + CHUNK_SIZE ]
81+ if chunk :
82+ connection .send_listen_v_1_media (chunk )
83+ chunk_count += 1
84+ time .sleep (CHUNK_DELAY )
5185
52- connection .start_listening ()
86+ print (f"Finished sending { chunk_count } chunks" )
87+ print ("Waiting for final transcription..." )
88+ time .sleep (2 )
5389
5490 # For async version:
5591 # from deepgram import AsyncDeepgramClient
0 commit comments