From 3c51804b5ef80f9797d66c8bad56d06a001450cc Mon Sep 17 00:00:00 2001 From: John Vajda Date: Tue, 22 Jul 2025 08:27:15 -0600 Subject: [PATCH 1/5] feat: add support for agent tags --- .../UnitTests/ClientTests/AgentClientTests.cs | 233 ++++++++++++++++++ Deepgram/Models/Agent/v2/WebSocket/Agent.cs | 7 + 2 files changed, 240 insertions(+) diff --git a/Deepgram.Tests/UnitTests/ClientTests/AgentClientTests.cs b/Deepgram.Tests/UnitTests/ClientTests/AgentClientTests.cs index d788527..f0c0efa 100644 --- a/Deepgram.Tests/UnitTests/ClientTests/AgentClientTests.cs +++ b/Deepgram.Tests/UnitTests/ClientTests/AgentClientTests.cs @@ -389,4 +389,237 @@ public void Agent_Should_Not_Have_MipOptOut_Property() } #endregion + + #region Tags Tests + + [Test] + public void Agent_Tags_Should_Have_Default_Value_Null() + { + // Arrange & Act + var agent = new Agent(); + + // Assert + using (new AssertionScope()) + { + agent.Tags.Should().BeNull(); + } + } + + [Test] + public void Agent_Tags_Should_Be_Settable() + { + // Arrange & Act + var agent = new Agent + { + Tags = new List { "test", "demo", "agent" } + }; + + // Assert + using (new AssertionScope()) + { + agent.Tags.Should().NotBeNull(); + agent.Tags.Should().HaveCount(3); + agent.Tags.Should().Contain("test"); + agent.Tags.Should().Contain("demo"); + agent.Tags.Should().Contain("agent"); + } + } + + [Test] + public void Agent_Tags_Should_Serialize_To_Json_Array() + { + // Arrange + var agent = new Agent + { + Tags = new List { "production", "voice-bot", "customer-service" } + }; + + // Act + var result = agent.ToString(); + + // Assert + using (new AssertionScope()) + { + result.Should().NotBeNull(); + result.Should().Contain("tags"); + result.Should().Contain("["); + result.Should().Contain("]"); + result.Should().Contain("production"); + result.Should().Contain("voice-bot"); + result.Should().Contain("customer-service"); + + // Verify it's valid JSON by parsing it + var parsed = JsonDocument.Parse(result); + var tagsArray = parsed.RootElement.GetProperty("tags"); + tagsArray.ValueKind.Should().Be(JsonValueKind.Array); + tagsArray.GetArrayLength().Should().Be(3); + + var tagsList = new List(); + foreach (var tag in tagsArray.EnumerateArray()) + { + tagsList.Add(tag.GetString()!); + } + tagsList.Should().Contain("production"); + tagsList.Should().Contain("voice-bot"); + tagsList.Should().Contain("customer-service"); + } + } + + [Test] + public void Agent_Tags_Empty_List_Should_Serialize_As_Empty_Array() + { + // Arrange + var agent = new Agent + { + Tags = new List() + }; + + // Act + var result = agent.ToString(); + + // Assert + using (new AssertionScope()) + { + result.Should().NotBeNull(); + result.Should().Contain("tags"); + result.Should().Contain("[]"); + + // Verify it's valid JSON by parsing it + var parsed = JsonDocument.Parse(result); + var tagsArray = parsed.RootElement.GetProperty("tags"); + tagsArray.ValueKind.Should().Be(JsonValueKind.Array); + tagsArray.GetArrayLength().Should().Be(0); + } + } + + [Test] + public void Agent_Tags_Null_Should_Not_Serialize() + { + // Arrange + var agent = new Agent + { + Tags = null + }; + + // Act + var result = agent.ToString(); + + // Assert + using (new AssertionScope()) + { + result.Should().NotBeNull(); + result.Should().NotContain("tags"); + + // Verify it's valid JSON by parsing it + var parsed = JsonDocument.Parse(result); + parsed.RootElement.TryGetProperty("tags", out _).Should().BeFalse(); + } + } + + [Test] + public void Agent_With_Tags_Should_Serialize_With_Other_Properties() + { + // Arrange + var agent = new Agent + { + Language = "en", + Greeting = "Hello, I'm your agent", + Tags = new List { "test-tag", "integration" }, + MipOptOut = true + }; + + // Act + var result = agent.ToString(); + + // Assert + using (new AssertionScope()) + { + result.Should().NotBeNull(); + result.Should().Contain("language"); + result.Should().Contain("greeting"); + result.Should().Contain("tags"); + result.Should().Contain("mip_opt_out"); + + // Verify it's valid JSON by parsing it + var parsed = JsonDocument.Parse(result); + parsed.RootElement.GetProperty("language").GetString().Should().Be("en"); + parsed.RootElement.GetProperty("greeting").GetString().Should().Be("Hello, I'm your agent"); + parsed.RootElement.GetProperty("mip_opt_out").GetBoolean().Should().BeTrue(); + + var tagsArray = parsed.RootElement.GetProperty("tags"); + tagsArray.ValueKind.Should().Be(JsonValueKind.Array); + tagsArray.GetArrayLength().Should().Be(2); + } + } + + [Test] + public void Agent_Tags_Should_Support_Special_Characters() + { + // Arrange + var agent = new Agent + { + Tags = new List { "test-with-dashes", "test_with_underscores", "test with spaces", "test.with.dots" } + }; + + // Act + var result = agent.ToString(); + + // Assert + using (new AssertionScope()) + { + result.Should().NotBeNull(); + + // Verify it's valid JSON by parsing it + var parsed = JsonDocument.Parse(result); + var tagsArray = parsed.RootElement.GetProperty("tags"); + tagsArray.ValueKind.Should().Be(JsonValueKind.Array); + tagsArray.GetArrayLength().Should().Be(4); + + var tagsList = new List(); + foreach (var tag in tagsArray.EnumerateArray()) + { + tagsList.Add(tag.GetString()!); + } + tagsList.Should().Contain("test-with-dashes"); + tagsList.Should().Contain("test_with_underscores"); + tagsList.Should().Contain("test with spaces"); + tagsList.Should().Contain("test.with.dots"); + } + } + + [Test] + public void Agent_Tags_Schema_Should_Match_API_Specification() + { + // Arrange - Test various scenarios as per API specification + var agentWithTags = new Agent { Tags = new List { "search-filter", "analytics", "production" } }; + var agentWithoutTags = new Agent { Tags = null }; + var agentWithEmptyTags = new Agent { Tags = new List() }; + + // Act + var withTagsResult = agentWithTags.ToString(); + var withoutTagsResult = agentWithoutTags.ToString(); + var emptyTagsResult = agentWithEmptyTags.ToString(); + + // Assert + using (new AssertionScope()) + { + // With tags should serialize array + var withTagsParsed = JsonDocument.Parse(withTagsResult); + var tagsArray = withTagsParsed.RootElement.GetProperty("tags"); + tagsArray.ValueKind.Should().Be(JsonValueKind.Array); + tagsArray.GetArrayLength().Should().Be(3); + + // Without tags should not include tags property + var withoutTagsParsed = JsonDocument.Parse(withoutTagsResult); + withoutTagsParsed.RootElement.TryGetProperty("tags", out _).Should().BeFalse(); + + // Empty tags should serialize as empty array + var emptyTagsParsed = JsonDocument.Parse(emptyTagsResult); + var emptyTagsArray = emptyTagsParsed.RootElement.GetProperty("tags"); + emptyTagsArray.ValueKind.Should().Be(JsonValueKind.Array); + emptyTagsArray.GetArrayLength().Should().Be(0); + } + } + + #endregion } \ No newline at end of file diff --git a/Deepgram/Models/Agent/v2/WebSocket/Agent.cs b/Deepgram/Models/Agent/v2/WebSocket/Agent.cs index 0004ee1..47a0b71 100644 --- a/Deepgram/Models/Agent/v2/WebSocket/Agent.cs +++ b/Deepgram/Models/Agent/v2/WebSocket/Agent.cs @@ -25,6 +25,13 @@ public record Agent [JsonPropertyName("speak")] public Speak Speak { get; set; } = new Speak(); + /// + /// Tags to associate with the request. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("tags")] + public List? Tags { get; set; } + /// /// The message to speak at the start of the connection. /// From fb74f821fc23108f2ebe15341fff14ebab152940 Mon Sep 17 00:00:00 2001 From: John Vajda Date: Fri, 1 Aug 2025 16:19:20 -0600 Subject: [PATCH 2/5] moves agent tags to settings --- .../UnitTests/ClientTests/AgentClientTests.cs | 104 +++++++++++------- Deepgram/Models/Agent/v2/WebSocket/Agent.cs | 7 -- .../Models/Agent/v2/WebSocket/Settings.cs | 7 ++ 3 files changed, 70 insertions(+), 48 deletions(-) diff --git a/Deepgram.Tests/UnitTests/ClientTests/AgentClientTests.cs b/Deepgram.Tests/UnitTests/ClientTests/AgentClientTests.cs index f0c0efa..0e2d1de 100644 --- a/Deepgram.Tests/UnitTests/ClientTests/AgentClientTests.cs +++ b/Deepgram.Tests/UnitTests/ClientTests/AgentClientTests.cs @@ -393,23 +393,23 @@ public void Agent_Should_Not_Have_MipOptOut_Property() #region Tags Tests [Test] - public void Agent_Tags_Should_Have_Default_Value_Null() + public void SettingsSchema_Tags_Should_Have_Default_Value_Null() { // Arrange & Act - var agent = new Agent(); + var settings = new SettingsSchema(); // Assert using (new AssertionScope()) { - agent.Tags.Should().BeNull(); + settings.Tags.Should().BeNull(); } } [Test] - public void Agent_Tags_Should_Be_Settable() + public void SettingsSchema_Tags_Should_Be_Settable() { // Arrange & Act - var agent = new Agent + var settings = new SettingsSchema { Tags = new List { "test", "demo", "agent" } }; @@ -417,25 +417,25 @@ public void Agent_Tags_Should_Be_Settable() // Assert using (new AssertionScope()) { - agent.Tags.Should().NotBeNull(); - agent.Tags.Should().HaveCount(3); - agent.Tags.Should().Contain("test"); - agent.Tags.Should().Contain("demo"); - agent.Tags.Should().Contain("agent"); + settings.Tags.Should().NotBeNull(); + settings.Tags.Should().HaveCount(3); + settings.Tags.Should().Contain("test"); + settings.Tags.Should().Contain("demo"); + settings.Tags.Should().Contain("agent"); } } [Test] - public void Agent_Tags_Should_Serialize_To_Json_Array() + public void SettingsSchema_Tags_Should_Serialize_To_Json_Array() { // Arrange - var agent = new Agent + var settings = new SettingsSchema { Tags = new List { "production", "voice-bot", "customer-service" } }; // Act - var result = agent.ToString(); + var result = settings.ToString(); // Assert using (new AssertionScope()) @@ -466,16 +466,16 @@ public void Agent_Tags_Should_Serialize_To_Json_Array() } [Test] - public void Agent_Tags_Empty_List_Should_Serialize_As_Empty_Array() + public void SettingsSchema_Tags_Empty_List_Should_Serialize_As_Empty_Array() { // Arrange - var agent = new Agent + var settings = new SettingsSchema { Tags = new List() }; // Act - var result = agent.ToString(); + var result = settings.ToString(); // Assert using (new AssertionScope()) @@ -493,16 +493,16 @@ public void Agent_Tags_Empty_List_Should_Serialize_As_Empty_Array() } [Test] - public void Agent_Tags_Null_Should_Not_Serialize() + public void SettingsSchema_Tags_Null_Should_Not_Serialize() { // Arrange - var agent = new Agent + var settings = new SettingsSchema { Tags = null }; // Act - var result = agent.ToString(); + var result = settings.ToString(); // Assert using (new AssertionScope()) @@ -517,33 +517,30 @@ public void Agent_Tags_Null_Should_Not_Serialize() } [Test] - public void Agent_With_Tags_Should_Serialize_With_Other_Properties() + public void SettingsSchema_With_Tags_Should_Serialize_With_Other_Properties() { // Arrange - var agent = new Agent + var settings = new SettingsSchema { - Language = "en", - Greeting = "Hello, I'm your agent", - Tags = new List { "test-tag", "integration" }, - MipOptOut = true + Experimental = true, + MipOptOut = true, + Tags = new List { "test-tag", "integration" } }; // Act - var result = agent.ToString(); + var result = settings.ToString(); // Assert using (new AssertionScope()) { result.Should().NotBeNull(); - result.Should().Contain("language"); - result.Should().Contain("greeting"); - result.Should().Contain("tags"); + result.Should().Contain("experimental"); result.Should().Contain("mip_opt_out"); + result.Should().Contain("tags"); // Verify it's valid JSON by parsing it var parsed = JsonDocument.Parse(result); - parsed.RootElement.GetProperty("language").GetString().Should().Be("en"); - parsed.RootElement.GetProperty("greeting").GetString().Should().Be("Hello, I'm your agent"); + parsed.RootElement.GetProperty("experimental").GetBoolean().Should().BeTrue(); parsed.RootElement.GetProperty("mip_opt_out").GetBoolean().Should().BeTrue(); var tagsArray = parsed.RootElement.GetProperty("tags"); @@ -553,16 +550,16 @@ public void Agent_With_Tags_Should_Serialize_With_Other_Properties() } [Test] - public void Agent_Tags_Should_Support_Special_Characters() + public void SettingsSchema_Tags_Should_Support_Special_Characters() { // Arrange - var agent = new Agent + var settings = new SettingsSchema { Tags = new List { "test-with-dashes", "test_with_underscores", "test with spaces", "test.with.dots" } }; // Act - var result = agent.ToString(); + var result = settings.ToString(); // Assert using (new AssertionScope()) @@ -588,17 +585,17 @@ public void Agent_Tags_Should_Support_Special_Characters() } [Test] - public void Agent_Tags_Schema_Should_Match_API_Specification() + public void SettingsSchema_Tags_Schema_Should_Match_API_Specification() { // Arrange - Test various scenarios as per API specification - var agentWithTags = new Agent { Tags = new List { "search-filter", "analytics", "production" } }; - var agentWithoutTags = new Agent { Tags = null }; - var agentWithEmptyTags = new Agent { Tags = new List() }; + var settingsWithTags = new SettingsSchema { Tags = new List { "search-filter", "analytics", "production" } }; + var settingsWithoutTags = new SettingsSchema { Tags = null }; + var settingsWithEmptyTags = new SettingsSchema { Tags = new List() }; // Act - var withTagsResult = agentWithTags.ToString(); - var withoutTagsResult = agentWithoutTags.ToString(); - var emptyTagsResult = agentWithEmptyTags.ToString(); + var withTagsResult = settingsWithTags.ToString(); + var withoutTagsResult = settingsWithoutTags.ToString(); + var emptyTagsResult = settingsWithEmptyTags.ToString(); // Assert using (new AssertionScope()) @@ -621,5 +618,30 @@ public void Agent_Tags_Schema_Should_Match_API_Specification() } } + [Test] + public void Agent_Should_Not_Have_Tags_Property() + { + // Arrange + var agent = new Agent + { + Language = "en", + Greeting = "Hello, I'm your agent" + }; + + // Act + var result = agent.ToString(); + + // Assert + using (new AssertionScope()) + { + result.Should().NotBeNull(); + result.Should().NotContain("tags"); + + // Verify it's valid JSON by parsing it + var parsed = JsonDocument.Parse(result); + parsed.RootElement.TryGetProperty("tags", out _).Should().BeFalse(); + } + } + #endregion } \ No newline at end of file diff --git a/Deepgram/Models/Agent/v2/WebSocket/Agent.cs b/Deepgram/Models/Agent/v2/WebSocket/Agent.cs index 47a0b71..0004ee1 100644 --- a/Deepgram/Models/Agent/v2/WebSocket/Agent.cs +++ b/Deepgram/Models/Agent/v2/WebSocket/Agent.cs @@ -25,13 +25,6 @@ public record Agent [JsonPropertyName("speak")] public Speak Speak { get; set; } = new Speak(); - /// - /// Tags to associate with the request. - /// - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - [JsonPropertyName("tags")] - public List? Tags { get; set; } - /// /// The message to speak at the start of the connection. /// diff --git a/Deepgram/Models/Agent/v2/WebSocket/Settings.cs b/Deepgram/Models/Agent/v2/WebSocket/Settings.cs index cbb8449..8c33cdc 100644 --- a/Deepgram/Models/Agent/v2/WebSocket/Settings.cs +++ b/Deepgram/Models/Agent/v2/WebSocket/Settings.cs @@ -20,6 +20,13 @@ public class SettingsSchema [JsonPropertyName("experimental")] public bool? Experimental { get; set; } + /// + /// Tags to associate with the request. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("tags")] + public List? Tags { get; set; } + /// /// To opt out of Deepgram Model Improvement Program /// From 1c265a78db491e763981556276d41762044ff7da Mon Sep 17 00:00:00 2001 From: John Vajda Date: Sat, 2 Aug 2025 11:17:33 -0600 Subject: [PATCH 3/5] fixes agent example + test tags --- examples/agent/websocket/simple/Program.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/agent/websocket/simple/Program.cs b/examples/agent/websocket/simple/Program.cs index 35fe317..ef1e675 100644 --- a/examples/agent/websocket/simple/Program.cs +++ b/examples/agent/websocket/simple/Program.cs @@ -217,6 +217,11 @@ await agentClient.Subscribe(new EventHandler((sender, e) => settingsConfiguration.Agent.Listen.Provider.Type = "deepgram"; settingsConfiguration.Agent.Listen.Provider.Model = "nova-3"; settingsConfiguration.Agent.Listen.Provider.Keyterms = new List { "Deepgram" }; + settingsConfiguration.Agent.Speak.Provider.Type = "deepgram"; + settingsConfiguration.Agent.Speak.Provider.Model = "aura-2-thalia-en"; + + // Add tags to test the new tagging capabilities + settingsConfiguration.Tags = new List { "dotnet-example","live-agent-test" }; // To avoid issues with empty objects, Voice and Endpoint are instantiated as null. Construct them as needed. // settingsConfiguration.Agent.Speak.Provider.Voice = new CartesiaVoice(); From 0aaeadb4f10c062648bfbd5b839621a63df8d59d Mon Sep 17 00:00:00 2001 From: John Vajda Date: Sat, 2 Aug 2025 11:45:27 -0600 Subject: [PATCH 4/5] fixes agent example + test tags --- examples/agent/websocket/simple/Program.cs | 270 ++++++++++++++------- 1 file changed, 179 insertions(+), 91 deletions(-) diff --git a/examples/agent/websocket/simple/Program.cs b/examples/agent/websocket/simple/Program.cs index ef1e675..949f249 100644 --- a/examples/agent/websocket/simple/Program.cs +++ b/examples/agent/websocket/simple/Program.cs @@ -7,6 +7,7 @@ using Deepgram.Models.Authenticate.v1; using Deepgram.Models.Agent.v2.WebSocket; using System.Collections.Generic; +using System.Runtime.InteropServices; using PortAudioSharp; namespace SampleApp @@ -55,107 +56,38 @@ static async Task Main(string[] args) DeepgramWsClientOptions options = new DeepgramWsClientOptions(null, null, true); var agentClient = ClientFactory.CreateAgentWebSocketClient(apiKey: "", options: options); - // current time - var lastAudioTime = DateTime.Now; - var audioFileCount = 0; + // Initialize conversation + Console.WriteLine("🎤 Ready for conversation! Speak into your microphone..."); // Subscribe to the EventResponseReceived event await agentClient.Subscribe(new EventHandler((sender, e) => { Console.WriteLine($"----> {e.Type} received"); })); - await agentClient.Subscribe(new EventHandler((sender, e) => + await agentClient.Subscribe(new EventHandler((sender, e) => { Console.WriteLine($"----> {e.Type} received"); - // if the last audio response is more than 5 seconds ago, add a wav header - if (DateTime.Now.Subtract(lastAudioTime).TotalSeconds > 7) + if (e.Stream != null && e.Stream.Length > 0) { - audioFileCount = audioFileCount + 1; // increment the audio file count + var audioData = e.Stream.ToArray(); + Console.WriteLine($"🔊 Queueing {audioData.Length} bytes of agent speech for playback"); - // delete the file if it exists - if (File.Exists($"output_{audioFileCount}.wav")) - { - File.Delete($"output_{audioFileCount}.wav"); - } - - using (BinaryWriter writer = new BinaryWriter(File.Open($"output_{audioFileCount}.wav", FileMode.Append))) - { - Console.WriteLine("Adding WAV header to output.wav"); - byte[] wavHeader = new byte[44]; - int sampleRate = 48000; - short bitsPerSample = 16; - short channels = 1; - int byteRate = sampleRate * channels * (bitsPerSample / 8); - short blockAlign = (short)(channels * (bitsPerSample / 8)); - - wavHeader[0] = 0x52; // R - wavHeader[1] = 0x49; // I - wavHeader[2] = 0x46; // F - wavHeader[3] = 0x46; // F - wavHeader[4] = 0x00; // Placeholder for file size (will be updated later) - wavHeader[5] = 0x00; // Placeholder for file size (will be updated later) - wavHeader[6] = 0x00; // Placeholder for file size (will be updated later) - wavHeader[7] = 0x00; // Placeholder for file size (will be updated later) - wavHeader[8] = 0x57; // W - wavHeader[9] = 0x41; // A - wavHeader[10] = 0x56; // V - wavHeader[11] = 0x45; // E - wavHeader[12] = 0x66; // f - wavHeader[13] = 0x6D; // m - wavHeader[14] = 0x74; // t - wavHeader[15] = 0x20; // Space - wavHeader[16] = 0x10; // Subchunk1Size (16 for PCM) - wavHeader[17] = 0x00; // Subchunk1Size - wavHeader[18] = 0x00; // Subchunk1Size - wavHeader[19] = 0x00; // Subchunk1Size - wavHeader[20] = 0x01; // AudioFormat (1 for PCM) - wavHeader[21] = 0x00; // AudioFormat - wavHeader[22] = (byte)channels; // NumChannels - wavHeader[23] = 0x00; // NumChannels - wavHeader[24] = (byte)(sampleRate & 0xFF); // SampleRate - wavHeader[25] = (byte)((sampleRate >> 8) & 0xFF); // SampleRate - wavHeader[26] = (byte)((sampleRate >> 16) & 0xFF); // SampleRate - wavHeader[27] = (byte)((sampleRate >> 24) & 0xFF); // SampleRate - wavHeader[28] = (byte)(byteRate & 0xFF); // ByteRate - wavHeader[29] = (byte)((byteRate >> 8) & 0xFF); // ByteRate - wavHeader[30] = (byte)((byteRate >> 16) & 0xFF); // ByteRate - wavHeader[31] = (byte)((byteRate >> 24) & 0xFF); // ByteRate - wavHeader[32] = (byte)blockAlign; // BlockAlign - wavHeader[33] = 0x00; // BlockAlign - wavHeader[34] = (byte)bitsPerSample; // BitsPerSample - wavHeader[35] = 0x00; // BitsPerSample - wavHeader[36] = 0x64; // d - wavHeader[37] = 0x61; // a - wavHeader[38] = 0x74; // t - wavHeader[39] = 0x61; // a - wavHeader[40] = 0x00; // Placeholder for data chunk size (will be updated later) - wavHeader[41] = 0x00; // Placeholder for data chunk size (will be updated later) - wavHeader[42] = 0x00; // Placeholder for data chunk size (will be updated later) - wavHeader[43] = 0x00; // Placeholder for data chunk size (will be updated later) - - writer.Write(wavHeader); - } + // Play audio through speakers + PlayAudioThroughSpeakers(audioData); } - - if (e.Stream != null) + else { - using (BinaryWriter writer = new BinaryWriter(File.Open($"output_{audioFileCount}.wav", FileMode.Append))) - { - writer.Write(e.Stream.ToArray()); - } + Console.WriteLine($"⚠️ Received empty audio stream"); } - - // record the last audio time - lastAudioTime = DateTime.Now; })); - await agentClient.Subscribe(new EventHandler((sender, e) => + await agentClient.Subscribe(new EventHandler((sender, e) => { - Console.WriteLine($"----> {e} received"); + Console.WriteLine($"----> {e} received - Agent finished speaking 🎤"); })); await agentClient.Subscribe(new EventHandler((sender, e) => { - Console.WriteLine($"----> {e} received"); + Console.WriteLine($"----> {e} received - Agent is speaking 🗣️"); })); await agentClient.Subscribe(new EventHandler((sender, e) => { @@ -171,7 +103,7 @@ await agentClient.Subscribe(new EventHandler((sende })); await agentClient.Subscribe(new EventHandler((sender, e) => { - Console.WriteLine($"----> {e} received"); + Console.WriteLine($"----> {e} received - User is speaking 👤"); })); await agentClient.Subscribe(new EventHandler((sender, e) => { @@ -210,10 +142,15 @@ await agentClient.Subscribe(new EventHandler((sender, e) => var settingsConfiguration = new SettingsSchema(); settingsConfiguration.Agent.Think.Provider.Type = "open_ai"; settingsConfiguration.Agent.Think.Provider.Model = "gpt-4o-mini"; - settingsConfiguration.Audio.Output.SampleRate = 16000; - settingsConfiguration.Audio.Output.Container = "wav"; - settingsConfiguration.Audio.Input.SampleRate = 44100; - settingsConfiguration.Agent.Greeting = "Hello, how can I help you today?"; + + // Configure audio settings - keep your input format, fix output + settingsConfiguration.Audio.Input.Encoding = "linear16"; + settingsConfiguration.Audio.Input.SampleRate = 24000; + settingsConfiguration.Audio.Output.Encoding = "linear16"; // Use linear16 for output too + settingsConfiguration.Audio.Output.SampleRate = 24000; + settingsConfiguration.Audio.Output.Container = "none"; + + settingsConfiguration.Agent.Greeting = "Hello! How can I help you today?"; settingsConfiguration.Agent.Listen.Provider.Type = "deepgram"; settingsConfiguration.Agent.Listen.Provider.Model = "nova-3"; settingsConfiguration.Agent.Listen.Provider.Keyterms = new List { "Deepgram" }; @@ -236,18 +173,42 @@ await agentClient.Subscribe(new EventHandler((sender, e) => return; } - // Microphone streaming + // Microphone streaming with debugging Console.WriteLine("Starting microphone..."); Microphone microphone = null; - try + int audioDataCounter = 0; + + try { - microphone = new Microphone(agentClient.SendBinary); + // Create microphone with proper sample rate and debugging + microphone = new Microphone( + push_callback: (audioData, length) => + { + audioDataCounter++; + Console.WriteLine($"[MIC] Captured audio chunk #{audioDataCounter}: {length} bytes"); + + // Create array with actual length + byte[] actualData = new byte[length]; + Array.Copy(audioData, actualData, length); + + // Send to agent + agentClient.SendBinary(actualData); + }, + rate: 24000, // Match the agent's expected input rate (24kHz) + chunkSize: 8192, // Standard chunk size + channels: 1, // Mono + device_index: PortAudio.DefaultInputDevice, + format: SampleFormat.Int16 + ); + microphone.Start(); - Console.WriteLine("Microphone started successfully. Waiting for audio input..."); + Console.WriteLine("Microphone started successfully. Speak into your microphone now!"); + Console.WriteLine("You should see '[MIC] Captured audio chunk' messages when speaking..."); } catch (Exception ex) { Console.WriteLine($"Error starting microphone: {ex.Message}"); + Console.WriteLine($"Stack trace: {ex.StackTrace}"); return; } @@ -271,6 +232,133 @@ await agentClient.Subscribe(new EventHandler((sender, e) => { Console.WriteLine($"Exception: {ex.Message}"); } + } + + // Audio playback queue and position tracking + private static Queue audioQueue = new Queue(); + private static byte[]? currentAudioBuffer = null; + private static int audioPosition = 0; + private static readonly object audioLock = new object(); + + /// + /// Plays audio data through the system's default output device (speakers) + /// + /// PCM audio data to play + static void PlayAudioThroughSpeakers(byte[] audioData) + { + try + { + lock (audioLock) + { + // Add to queue for playback + audioQueue.Enqueue(audioData); + } + + // Start playback stream if not already running + StartAudioPlayback(); + } + catch (Exception ex) + { + Console.WriteLine($"❌ Error queuing audio: {ex.Message}"); + } + } + + private static PortAudioSharp.Stream? _outputStream = null; + + private static void StartAudioPlayback() + { + if (_outputStream != null) + return; // Already playing + + try + { + // Get default output device + int outputDevice = PortAudio.DefaultOutputDevice; + if (outputDevice == PortAudio.NoDevice) + { + Console.WriteLine("⚠️ No default output device found for audio playback"); + return; + } + + var deviceInfo = PortAudio.GetDeviceInfo(outputDevice); + Console.WriteLine($"🔊 Playing through: {deviceInfo.name}"); + + // Set up output stream parameters + var outputParams = new PortAudioSharp.StreamParameters + { + device = outputDevice, + channelCount = 1, // mono + sampleFormat = PortAudioSharp.SampleFormat.Int16, + suggestedLatency = deviceInfo.defaultLowOutputLatency, + hostApiSpecificStreamInfo = IntPtr.Zero + }; + + // Create and start the output stream + _outputStream = new PortAudioSharp.Stream( + inParams: null, + outParams: outputParams, + sampleRate: 24000, // Match agent output (24kHz) + framesPerBuffer: 512, + streamFlags: PortAudioSharp.StreamFlags.ClipOff, + callback: OutputCallback, + userData: IntPtr.Zero + ); + + _outputStream.Start(); + } + catch (Exception ex) + { + Console.WriteLine($"❌ Error starting audio playback: {ex.Message}"); + _outputStream = null; + } + } + + private static PortAudioSharp.StreamCallbackResult OutputCallback(nint input, nint output, uint frameCount, ref PortAudioSharp.StreamCallbackTimeInfo timeInfo, PortAudioSharp.StreamCallbackFlags statusFlags, nint userDataPtr) + { + lock (audioLock) + { + int bytesToWrite = (int)(frameCount * sizeof(Int16)); // 16-bit samples + byte[] outputBuffer = new byte[bytesToWrite]; + + int bytesWritten = 0; + while (bytesWritten < bytesToWrite) + { + // Get next buffer if current one is exhausted + if (currentAudioBuffer == null || audioPosition >= currentAudioBuffer.Length) + { + if (audioQueue.Count > 0) + { + currentAudioBuffer = audioQueue.Dequeue(); + audioPosition = 0; + Console.WriteLine($"🔊 Playing new audio buffer: {currentAudioBuffer.Length} bytes (Queue: {audioQueue.Count} remaining)"); + } + else + { + // No more audio, fill with silence but KEEP stream running for next audio + for (int i = bytesWritten; i < bytesToWrite; i++) + outputBuffer[i] = 0; + + Marshal.Copy(outputBuffer, 0, output, bytesToWrite); + // DON'T stop the stream - keep it running for next conversation + return PortAudioSharp.StreamCallbackResult.Continue; + } + } + + // Copy data from current buffer + int remainingInBuffer = currentAudioBuffer.Length - audioPosition; + int remainingToWrite = bytesToWrite - bytesWritten; + int bytesToCopy = Math.Min(remainingInBuffer, remainingToWrite); + + Array.Copy(currentAudioBuffer, audioPosition, outputBuffer, bytesWritten, bytesToCopy); + audioPosition += bytesToCopy; + bytesWritten += bytesToCopy; + } + + // Copy to output + Marshal.Copy(outputBuffer, 0, output, bytesToWrite); + } + + return PortAudioSharp.StreamCallbackResult.Continue; } } } From 35556e8cb5dc2f42abaeeede95667bb6985c9709 Mon Sep 17 00:00:00 2001 From: John Vajda Date: Mon, 4 Aug 2025 16:16:22 -0600 Subject: [PATCH 5/5] code rabbit feedback --- examples/agent/websocket/simple/Program.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/agent/websocket/simple/Program.cs b/examples/agent/websocket/simple/Program.cs index 949f249..c78f354 100644 --- a/examples/agent/websocket/simple/Program.cs +++ b/examples/agent/websocket/simple/Program.cs @@ -224,6 +224,14 @@ await agentClient.Subscribe(new EventHandler((sender, e) => // Stop the connection await agentClient.Stop(); + // Stop and dispose PortAudio output stream + if (_outputStream != null) + { + _outputStream.Stop(); + _outputStream.Dispose(); + _outputStream = null; + } + // Terminate Libraries Deepgram.Microphone.Library.Terminate(); Deepgram.Library.Terminate();