diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/CompatibilitySuppressions.xml b/src/Libraries/Microsoft.Extensions.AI.Abstractions/CompatibilitySuppressions.xml index 106feff432e..98d238db9ad 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/CompatibilitySuppressions.xml +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/CompatibilitySuppressions.xml @@ -805,4 +805,144 @@ lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll true + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net10.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + lib/net462/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net8.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + lib/net9.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_ModelId + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.get_SpeechLanguage + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_ModelId(System.String) + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + true + + + CP0002 + M:Microsoft.Extensions.AI.SpeechToTextOptions.set_SpeechLanguage(System.String) + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + lib/netstandard2.0/Microsoft.Extensions.AI.Abstractions.dll + true + \ No newline at end of file diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/CreateConversationItemRealtimeClientMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/CreateConversationItemRealtimeClientMessage.cs new file mode 100644 index 00000000000..0f1f245e00c --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/CreateConversationItemRealtimeClientMessage.cs @@ -0,0 +1,29 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message for creating a conversation item. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class CreateConversationItemRealtimeClientMessage : RealtimeClientMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// The conversation item to create. + public CreateConversationItemRealtimeClientMessage(RealtimeConversationItem item) + { + Item = Throw.IfNull(item); + } + + /// + /// Gets or sets the conversation item to create. + /// + public RealtimeConversationItem Item { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/CreateResponseRealtimeClientMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/CreateResponseRealtimeClientMessage.cs new file mode 100644 index 00000000000..4ee2bc36cb1 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/CreateResponseRealtimeClientMessage.cs @@ -0,0 +1,125 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a client message that triggers model inference to generate a response. +/// +/// +/// +/// Sending this message instructs the provider to generate a new response from the model. +/// The response may include one or more output items (text, audio, or tool calls). +/// Properties on this message optionally override the session-level configuration +/// for this response only. +/// +/// +/// Not all providers support explicit response triggering. Voice-activity-detection (VAD) driven +/// providers may respond automatically when speech is detected or input is committed, in which case +/// this message may be treated as a no-op. Per-response overrides (instructions, tools, voice, etc.) +/// are advisory and may be silently ignored by providers that do not support them. +/// +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class CreateResponseRealtimeClientMessage : RealtimeClientMessage +{ + /// + /// Initializes a new instance of the class. + /// + public CreateResponseRealtimeClientMessage() + { + } + + /// + /// Gets or sets the list of the conversation items to create a response for. + /// + public IList? Items { get; set; } + + /// + /// Gets or sets the output audio options for the response. + /// + /// + /// If set, overrides the session-level audio output configuration for this response only. + /// If , the session's default audio options are used. + /// + public RealtimeAudioFormat? OutputAudioOptions { get; set; } + + /// + /// Gets or sets the voice of the output audio. + /// + /// + /// If set, overrides the session-level voice for this response only. + /// If , the session's default voice is used. + /// + public string? OutputVoice { get; set; } + + /// + /// Gets or sets a value indicating whether the response output should be excluded from the conversation context. + /// + /// + /// When , the response is generated out-of-band: the model produces output + /// but the resulting items are not added to the conversation history, so they will not appear + /// as context for subsequent responses. + /// If , the provider's default behavior is used. + /// + public bool? ExcludeFromConversation { get; set; } + + /// + /// Gets or sets the instructions that guide the model on desired responses. + /// + /// + /// If set, overrides the session-level instructions for this response only. + /// If , the session's default instructions are used. + /// + public string? Instructions { get; set; } + + /// + /// Gets or sets the maximum number of output tokens for the response, inclusive of all modalities and tool calls. + /// + /// + /// This limit applies to the total output tokens regardless of modality (text, audio, etc.). + /// If , the provider's default limit is used. + /// + public int? MaxOutputTokens { get; set; } + + /// + /// Gets or sets any additional properties associated with the response request. + /// + /// + /// This can be used to attach arbitrary key-value metadata to a response request + /// for tracking or disambiguation purposes (e.g., correlating multiple simultaneous responses). + /// Providers may map this to their own metadata fields. + /// + public AdditionalPropertiesDictionary? AdditionalProperties { get; set; } + + /// + /// Gets or sets the output modalities for the response (e.g., "text", "audio"). + /// + /// + /// If set, overrides the session-level output modalities for this response only. + /// If , the session's default modalities are used. + /// + public IList? OutputModalities { get; set; } + + /// + /// Gets or sets the tool choice mode for the response. + /// + /// + /// If set, overrides the session-level tool choice for this response only. + /// If , the session's default tool choice is used. + /// + public ChatToolMode? ToolMode { get; set; } + + /// + /// Gets or sets the AI tools available for generating the response. + /// + /// + /// If set, overrides the session-level tools for this response only. + /// If , the session's default tools are used. + /// + public IList? Tools { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/DelegatingRealtimeClient.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/DelegatingRealtimeClient.cs new file mode 100644 index 00000000000..217f0851264 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/DelegatingRealtimeClient.cs @@ -0,0 +1,68 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Provides an optional base class for an that passes through calls to another instance. +/// +/// +/// This is recommended as a base type when building clients that can be chained around an underlying . +/// The default implementation simply passes each call to the inner client instance. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class DelegatingRealtimeClient : IRealtimeClient +{ + /// + /// Initializes a new instance of the class. + /// + /// The wrapped client instance. + /// is . + protected DelegatingRealtimeClient(IRealtimeClient innerClient) + { + InnerClient = Throw.IfNull(innerClient); + } + + /// + public void Dispose() + { + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + + /// Gets the inner . + protected IRealtimeClient InnerClient { get; } + + /// + public virtual Task CreateSessionAsync( + RealtimeSessionOptions? options = null, CancellationToken cancellationToken = default) => + InnerClient.CreateSessionAsync(options, cancellationToken); + + /// + public virtual object? GetService(Type serviceType, object? serviceKey = null) + { + _ = Throw.IfNull(serviceType); + + // If the key is non-null, we don't know what it means so pass through to the inner service. + return + serviceKey is null && serviceType.IsInstanceOfType(this) ? this : + InnerClient.GetService(serviceType, serviceKey); + } + + /// Provides a mechanism for releasing unmanaged resources. + /// if being called from ; otherwise, . + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + InnerClient.Dispose(); + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ErrorRealtimeServerMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ErrorRealtimeServerMessage.cs new file mode 100644 index 00000000000..8a606f53a82 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ErrorRealtimeServerMessage.cs @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time server error message. +/// +/// +/// Used with the . +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class ErrorRealtimeServerMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class. + /// + public ErrorRealtimeServerMessage() + { + Type = RealtimeServerMessageType.Error; + } + + /// + /// Gets or sets the error content associated with the error message. + /// + public ErrorContent? Error { get; set; } + + /// + /// Gets or sets the ID of the client message that caused the error. + /// + /// + /// Unlike , which identifies this server message itself, + /// this property identifies the originating client message that triggered the error. + /// + public string? OriginatingMessageId { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClient.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClient.cs new file mode 100644 index 00000000000..5ae142326f1 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClient.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Represents a real-time client. +/// This interface provides methods to create and manage real-time sessions. +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public interface IRealtimeClient : IDisposable +{ + /// Creates a new real-time session with the specified options. + /// The session options. + /// A token to cancel the operation. + /// The created real-time session. + Task CreateSessionAsync(RealtimeSessionOptions? options = null, CancellationToken cancellationToken = default); + + /// Asks the for an object of the specified type . + /// The type of object being requested. + /// An optional key that can be used to help identify the target service. + /// The found object, otherwise . + /// is . + /// + /// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the , + /// including itself or any services it might be wrapping. + /// + object? GetService(Type serviceType, object? serviceKey = null); +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClientSession.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClientSession.cs new file mode 100644 index 00000000000..0240262c513 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/IRealtimeClientSession.cs @@ -0,0 +1,53 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Represents a real-time session. +/// This interface provides methods to manage a real-time session and to interact with the real-time model. +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public interface IRealtimeClientSession : IAsyncDisposable +{ + /// + /// Gets the current session options. + /// + RealtimeSessionOptions? Options { get; } + + /// + /// Sends a client message to the session. + /// + /// The client message to send. + /// A token to cancel the operation. + /// A task that represents the asynchronous send operation. + /// + /// This method allows for sending client messages to the session at any time, which can be used to influence the session's behavior or state. + /// + Task SendAsync(RealtimeClientMessage message, CancellationToken cancellationToken = default); + + /// Streams the response from the real-time session. + /// A token to cancel the operation. + /// The response messages generated by the session. + /// + /// This method cannot be called multiple times concurrently on the same session instance. + /// + IAsyncEnumerable GetStreamingResponseAsync( + CancellationToken cancellationToken = default); + + /// Asks the for an object of the specified type . + /// The type of object being requested. + /// An optional key that can be used to help identify the target service. + /// The found object, otherwise . + /// is . + /// + /// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the , + /// including itself or any services it might be wrapping. + /// + object? GetService(Type serviceType, object? serviceKey = null); +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioBufferAppendRealtimeClientMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioBufferAppendRealtimeClientMessage.cs new file mode 100644 index 00000000000..1f20903fb74 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioBufferAppendRealtimeClientMessage.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message for appending audio buffer input. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class InputAudioBufferAppendRealtimeClientMessage : RealtimeClientMessage +{ + private DataContent _content; + + /// + /// Initializes a new instance of the class. + /// + /// The data content containing the audio buffer data to append. + public InputAudioBufferAppendRealtimeClientMessage(DataContent audioContent) + { + _content = Throw.IfNull(audioContent); + } + + /// + /// Gets or sets the audio content to append to the model audio buffer. + /// + /// + /// The content should include the audio buffer data that needs to be appended to the input audio buffer. + /// + public DataContent Content + { + get => _content; + set => _content = Throw.IfNull(value); + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioBufferCommitRealtimeClientMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioBufferCommitRealtimeClientMessage.cs new file mode 100644 index 00000000000..427fbda5ca9 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioBufferCommitRealtimeClientMessage.cs @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message for committing audio buffer input. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class InputAudioBufferCommitRealtimeClientMessage : RealtimeClientMessage +{ + /// + /// Initializes a new instance of the class. + /// + public InputAudioBufferCommitRealtimeClientMessage() + { + } +} + diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioTranscriptionRealtimeServerMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioTranscriptionRealtimeServerMessage.cs new file mode 100644 index 00000000000..c50d7c8240f --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/InputAudioTranscriptionRealtimeServerMessage.cs @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time server message for input audio transcription. +/// +/// +/// Used when having InputAudioTranscriptionCompleted, InputAudioTranscriptionDelta, or InputAudioTranscriptionFailed response types. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class InputAudioTranscriptionRealtimeServerMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// The type of the real-time server response. + /// + /// The parameter should be InputAudioTranscriptionCompleted, InputAudioTranscriptionDelta, or InputAudioTranscriptionFailed. + /// + public InputAudioTranscriptionRealtimeServerMessage(RealtimeServerMessageType type) + { + Type = type; + } + + /// + /// Gets or sets the index of the content part containing the audio. + /// + public int? ContentIndex { get; set; } + + /// + /// Gets or sets the ID of the item containing the audio that is being transcribed. + /// + public string? ItemId { get; set; } + + /// + /// Gets or sets the transcription text of the audio. + /// + public string? Transcription { get; set; } + + /// + /// Gets or sets the transcription-specific usage, which is billed separately from the realtime model. + /// + /// + /// This usage reflects the cost of the speech-to-text transcription and is billed according to the + /// ASR (Automatic Speech Recognition) model's pricing rather than the realtime model's pricing. + /// + public UsageDetails? Usage { get; set; } + + /// + /// Gets or sets the error content if an error occurred during transcription. + /// + public ErrorContent? Error { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/OutputTextAudioRealtimeServerMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/OutputTextAudioRealtimeServerMessage.cs new file mode 100644 index 00000000000..37861c4f76e --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/OutputTextAudioRealtimeServerMessage.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time server message for output text and audio. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class OutputTextAudioRealtimeServerMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class for handling output text delta responses. + /// + /// The type of the real-time server response. + /// + /// The should be , , + /// , , + /// , or . + /// + public OutputTextAudioRealtimeServerMessage(RealtimeServerMessageType type) + { + Type = type; + } + + /// + /// Gets or sets the index of the content part whose text has been updated. + /// + public int? ContentIndex { get; set; } + + /// + /// Gets or sets the text delta or final text content. + /// + /// + /// Populated for , , + /// , and messages. + /// For audio messages ( and ), + /// use instead. + /// + public string? Text { get; set; } + + /// + /// Gets or sets the Base64-encoded audio data delta or final audio content. + /// + /// + /// Populated for messages. + /// For , this is typically + /// as the final audio is not included; use the accumulated deltas instead. + /// For text content, use instead. + /// + public string? Audio { get; set; } + + /// + /// Gets or sets the ID of the item containing the content part whose text has been updated. + /// + public string? ItemId { get; set; } + + /// + /// Gets or sets the index of the output item in the response. + /// + public int? OutputIndex { get; set; } + + /// + /// Gets or sets the ID of the response. + /// + /// + /// May be for providers that do not natively track response lifecycle. + /// + public string? ResponseId { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeAudioFormat.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeAudioFormat.cs new file mode 100644 index 00000000000..c8684185268 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeAudioFormat.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring real-time audio. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeAudioFormat +{ + /// + /// Initializes a new instance of the class. + /// + public RealtimeAudioFormat(string mediaType, int sampleRate) + { + MediaType = mediaType; + SampleRate = sampleRate; + } + + /// + /// Gets the media type of the audio (e.g., "audio/pcm", "audio/pcmu", "audio/pcma"). + /// + public string MediaType { get; init; } + + /// + /// Gets the sample rate of the audio in Hertz. + /// + public int SampleRate { get; init; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientMessage.cs new file mode 100644 index 00000000000..0f035933462 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeClientMessage.cs @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message the client sends to the model. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeClientMessage +{ + /// + /// Gets or sets the optional message ID associated with the message. + /// This can be used for tracking and correlation purposes. + /// + public string? MessageId { get; set; } + + /// + /// Gets or sets the raw representation of the message. + /// This can be used to send the raw data to the model. + /// + /// + /// The raw representation is typically used for custom or unsupported message types. + /// For example, the model may accept a JSON serialized message. + /// + public object? RawRepresentation { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeConversationItem.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeConversationItem.cs new file mode 100644 index 00000000000..7373a5d6773 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeConversationItem.cs @@ -0,0 +1,61 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time conversation item. +/// +/// +/// This class is used to encapsulate the details of a real-time item that can be inserted into a conversation, +/// or sent as part of a real-time response creation process. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeConversationItem +{ + /// + /// Initializes a new instance of the class. + /// + /// The contents of the conversation item. + /// The ID of the conversation item. + /// The role of the conversation item. + public RealtimeConversationItem(IList contents, string? id = null, ChatRole? role = null) + { + Id = id; + Role = role; + Contents = contents; + } + + /// + /// Gets or sets the ID of the conversation item. + /// + /// + /// This ID can be null in case passing Function or MCP content where the ID is not required. + /// The Id only needed of having contents representing a user, system, or assistant message with contents like text, audio, image or similar. + /// + public string? Id { get; set; } + + /// + /// Gets or sets the role of the conversation item. + /// + /// + /// The role not used in case of Function or MCP content. + /// The role only needed of having contents representing a user, system, or assistant message with contents like text, audio, image or similar. + /// + public ChatRole? Role { get; set; } + + /// + /// Gets or sets the content of the conversation item. + /// + public IList Contents { get; set; } + + /// + /// Gets or sets the raw representation of the conversation item. + /// This can be used to hold the original data structure received from or sent to the provider. + /// + public object? RawRepresentation { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeResponseStatus.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeResponseStatus.cs new file mode 100644 index 00000000000..133bcabec79 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeResponseStatus.cs @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Defines well-known status values for real-time response lifecycle messages. +/// +/// +/// These constants represent the standard status values that may appear on +/// when the response completes +/// (i.e., on ). +/// Providers may use additional status values beyond those defined here. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public static class RealtimeResponseStatus +{ + /// + /// The response completed successfully. + /// + public const string Completed = "completed"; + + /// + /// The response was cancelled, typically due to an interruption such as user barge-in + /// (the user started speaking while the model was generating output). + /// + public const string Cancelled = "cancelled"; + + /// + /// The response ended before completing, for example because the output reached + /// the maximum token limit. + /// + public const string Incomplete = "incomplete"; + + /// + /// The response failed due to an error. + /// + public const string Failed = "failed"; +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessage.cs new file mode 100644 index 00000000000..0e023fde4f4 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessage.cs @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time server response message. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeServerMessage +{ + /// + /// Gets or sets the type of the real-time response. + /// + public RealtimeServerMessageType Type { get; set; } + + /// + /// Gets or sets the optional message ID associated with the response. + /// This can be used for tracking and correlation purposes. + /// + public string? MessageId { get; set; } + + /// + /// Gets or sets the raw representation of the response. + /// This can be used to hold the original data structure received from the model. + /// + /// + /// The raw representation is typically used for custom or unsupported message types. + /// For example, the model may accept a JSON serialized server message. + /// + public object? RawRepresentation { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessageType.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessageType.cs new file mode 100644 index 00000000000..d7ceb0d52ca --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeServerMessageType.cs @@ -0,0 +1,156 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.ComponentModel; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents the type of a real-time server message. +/// This is used to identify the message type being received from the model. +/// +/// +/// +/// Well-known message types are provided as static properties. Providers may define additional +/// message types by constructing new instances with custom values. +/// +/// +/// Provider implementations that want to support the built-in middleware pipeline +/// ( and +/// ) must emit the following +/// message types at appropriate points during response generation: +/// +/// — when the model begins generating a new response. +/// — when the model has finished generating a response (with usage data if available). +/// — when a new output item (e.g., function call, message) is added during response generation. +/// — when an individual output item has completed. This is required for function invocation middleware to detect and invoke tool calls. +/// +/// +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +[JsonConverter(typeof(Converter))] +[DebuggerDisplay("{Value,nq}")] +public readonly struct RealtimeServerMessageType : IEquatable +{ + /// Gets a message type indicating that the response contains only raw content. + /// + /// This type supports extensibility for custom content types not natively supported by the SDK. + /// + public static RealtimeServerMessageType RawContentOnly { get; } = new("RawContentOnly"); + + /// Gets a message type indicating the output of audio transcription for user audio written to the user audio buffer. + public static RealtimeServerMessageType InputAudioTranscriptionCompleted { get; } = new("InputAudioTranscriptionCompleted"); + + /// Gets a message type indicating the text value of an input audio transcription content part is updated with incremental transcription results. + public static RealtimeServerMessageType InputAudioTranscriptionDelta { get; } = new("InputAudioTranscriptionDelta"); + + /// Gets a message type indicating that the audio transcription for user audio written to the user audio buffer has failed. + public static RealtimeServerMessageType InputAudioTranscriptionFailed { get; } = new("InputAudioTranscriptionFailed"); + + /// Gets a message type indicating the output text update with incremental results. + public static RealtimeServerMessageType OutputTextDelta { get; } = new("OutputTextDelta"); + + /// Gets a message type indicating the output text is complete. + public static RealtimeServerMessageType OutputTextDone { get; } = new("OutputTextDone"); + + /// Gets a message type indicating the model-generated transcription of audio output updated. + public static RealtimeServerMessageType OutputAudioTranscriptionDelta { get; } = new("OutputAudioTranscriptionDelta"); + + /// Gets a message type indicating the model-generated transcription of audio output is done streaming. + public static RealtimeServerMessageType OutputAudioTranscriptionDone { get; } = new("OutputAudioTranscriptionDone"); + + /// Gets a message type indicating the audio output updated. + public static RealtimeServerMessageType OutputAudioDelta { get; } = new("OutputAudioDelta"); + + /// Gets a message type indicating the audio output is done streaming. + public static RealtimeServerMessageType OutputAudioDone { get; } = new("OutputAudioDone"); + + /// Gets a message type indicating the response has completed. + public static RealtimeServerMessageType ResponseDone { get; } = new("ResponseDone"); + + /// Gets a message type indicating the response has been created. + public static RealtimeServerMessageType ResponseCreated { get; } = new("ResponseCreated"); + + /// Gets a message type indicating an individual output item in the response has completed. + public static RealtimeServerMessageType ResponseOutputItemDone { get; } = new("ResponseOutputItemDone"); + + /// Gets a message type indicating an individual output item has been added to the response. + public static RealtimeServerMessageType ResponseOutputItemAdded { get; } = new("ResponseOutputItemAdded"); + + /// Gets a message type indicating an error occurred while processing the request. + public static RealtimeServerMessageType Error { get; } = new("Error"); + + /// + /// Gets the value associated with this . + /// + public string Value { get; } + + /// + /// Initializes a new instance of the struct with the provided value. + /// + /// The value to associate with this . + [JsonConstructor] + public RealtimeServerMessageType(string value) + { + Value = Throw.IfNullOrWhitespace(value); + } + + /// + /// Returns a value indicating whether two instances are equivalent, as determined by a + /// case-insensitive comparison of their values. + /// + /// The first instance to compare. + /// The second instance to compare. + /// if left and right have equivalent values; otherwise, . + public static bool operator ==(RealtimeServerMessageType left, RealtimeServerMessageType right) + { + return left.Equals(right); + } + + /// + /// Returns a value indicating whether two instances are not equivalent, as determined by a + /// case-insensitive comparison of their values. + /// + /// The first instance to compare. + /// The second instance to compare. + /// if left and right have different values; otherwise, . + public static bool operator !=(RealtimeServerMessageType left, RealtimeServerMessageType right) + { + return !(left == right); + } + + /// + public override bool Equals([NotNullWhen(true)] object? obj) + => obj is RealtimeServerMessageType other && Equals(other); + + /// + public bool Equals(RealtimeServerMessageType other) + => string.Equals(Value, other.Value, StringComparison.OrdinalIgnoreCase); + + /// + public override int GetHashCode() + => Value is null ? 0 : StringComparer.OrdinalIgnoreCase.GetHashCode(Value); + + /// + public override string ToString() => Value ?? string.Empty; + + /// Provides a for serializing instances. + [EditorBrowsable(EditorBrowsableState.Never)] + public sealed class Converter : JsonConverter + { + /// + public override RealtimeServerMessageType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) => + new(reader.GetString()!); + + /// + public override void Write(Utf8JsonWriter writer, RealtimeServerMessageType value, JsonSerializerOptions options) => + Throw.IfNull(writer).WriteStringValue(value.Value); + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionKind.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionKind.cs new file mode 100644 index 00000000000..c612ac08c5a --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionKind.cs @@ -0,0 +1,99 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.ComponentModel; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents the kind of a real-time session. +/// +/// +/// Well-known session kinds are provided as static properties. Providers may define additional +/// session kinds by constructing new instances with custom values. +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +[JsonConverter(typeof(Converter))] +[DebuggerDisplay("{Value,nq}")] +public readonly struct RealtimeSessionKind : IEquatable +{ + /// + /// Gets a session kind representing a conversational session which processes audio, text, or other media in real-time. + /// + public static RealtimeSessionKind Conversation { get; } = new("conversation"); + + /// + /// Gets a session kind representing a transcription-only session. + /// + public static RealtimeSessionKind Transcription { get; } = new("transcription"); + + /// Gets the value of the session kind. + public string Value { get; } + + /// Initializes a new instance of the struct with the provided value. + /// The value to associate with this . + [JsonConstructor] + public RealtimeSessionKind(string value) + { + Value = Throw.IfNullOrWhitespace(value); + } + + /// + /// Returns a value indicating whether two instances are equivalent, as determined by a + /// case-insensitive comparison of their values. + /// + /// The first instance to compare. + /// The second instance to compare. + /// if left and right have equivalent values; otherwise, . + public static bool operator ==(RealtimeSessionKind left, RealtimeSessionKind right) + { + return left.Equals(right); + } + + /// + /// Returns a value indicating whether two instances are not equivalent, as determined by a + /// case-insensitive comparison of their values. + /// + /// The first instance to compare. + /// The second instance to compare. + /// if left and right have different values; otherwise, . + public static bool operator !=(RealtimeSessionKind left, RealtimeSessionKind right) + { + return !(left == right); + } + + /// + public override bool Equals([NotNullWhen(true)] object? obj) + => obj is RealtimeSessionKind other && Equals(other); + + /// + public bool Equals(RealtimeSessionKind other) + => string.Equals(Value, other.Value, StringComparison.OrdinalIgnoreCase); + + /// + public override int GetHashCode() + => Value is null ? 0 : StringComparer.OrdinalIgnoreCase.GetHashCode(Value); + + /// + public override string ToString() => Value ?? string.Empty; + + /// Provides a for serializing instances. + [EditorBrowsable(EditorBrowsableState.Never)] + public sealed class Converter : JsonConverter + { + /// + public override RealtimeSessionKind Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) => + new(reader.GetString()!); + + /// + public override void Write(Utf8JsonWriter writer, RealtimeSessionKind value, JsonSerializerOptions options) => + Throw.IfNull(writer).WriteStringValue(value.Value); + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionOptions.cs new file mode 100644 index 00000000000..e4aa12e9a1c --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/RealtimeSessionOptions.cs @@ -0,0 +1,99 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Represents options for configuring a real-time session. +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class RealtimeSessionOptions +{ + /// + /// Gets the session kind. + /// + /// + /// If set to , most of the sessions properties will not apply to the session. Only InputAudioFormat and TranscriptionOptions will be used. + /// + public RealtimeSessionKind SessionKind { get; init; } = RealtimeSessionKind.Conversation; + + /// + /// Gets the model name to use for the session. + /// + public string? Model { get; init; } + + /// + /// Gets the input audio format for the session. + /// + public RealtimeAudioFormat? InputAudioFormat { get; init; } + + /// + /// Gets the transcription options for the session. + /// + public TranscriptionOptions? TranscriptionOptions { get; init; } + + /// + /// Gets the output audio format for the session. + /// + public RealtimeAudioFormat? OutputAudioFormat { get; init; } + + /// + /// Gets the output voice for the session. + /// + public string? Voice { get; init; } + + /// + /// Gets the default system instructions for the session. + /// + public string? Instructions { get; init; } + + /// + /// Gets the maximum number of response tokens for the session. + /// + public int? MaxOutputTokens { get; init; } + + /// + /// Gets the output modalities for the response. like "text", "audio". + /// If null, then default conversation modalities will be used. + /// + public IReadOnlyList? OutputModalities { get; init; } + + /// + /// Gets the tool choice mode for the session. + /// + public ChatToolMode? ToolMode { get; init; } + + /// + /// Gets the AI tools available for generating the response. + /// + public IReadOnlyList? Tools { get; init; } + + /// + /// Gets a callback responsible for creating the raw representation of the session options from an underlying implementation. + /// + /// + /// The underlying implementation might have its own representation of options. + /// When a is sent with a , + /// that implementation might convert the provided options into its own representation in order to use it while + /// performing the operation. For situations where a consumer knows which concrete + /// is being used and how it represents options, a new instance of that implementation-specific options type can be + /// returned by this callback for the implementation to use, instead of creating a + /// new instance. Such implementations might mutate the supplied options instance further based on other settings + /// supplied on this instance or from other inputs. + /// Therefore, it is strongly recommended to not return shared instances and instead make the callback return + /// a new instance on each call. + /// This is typically used to set an implementation-specific setting that isn't otherwise exposed from the strongly typed + /// properties on . + /// + /// Unlike similar factories on other options types, this callback does not receive the session instance + /// as a parameter because some providers need to evaluate it before the session is created + /// (e.g., to produce connection configuration). + /// + /// + [JsonIgnore] + public Func? RawRepresentationFactory { get; init; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ResponseCreatedRealtimeServerMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ResponseCreatedRealtimeServerMessage.cs new file mode 100644 index 00000000000..517b9f8dc04 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ResponseCreatedRealtimeServerMessage.cs @@ -0,0 +1,119 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message for creating a response item. +/// +/// +/// +/// Used with the and messages. +/// +/// +/// Provider implementations should emit this message with +/// when the model begins generating a new response, and with +/// when the response is complete. The built-in middleware depends +/// on these messages for tracing response lifecycle. +/// +/// +/// Providers that do not natively support response lifecycle events (e.g., those that only stream content parts +/// and signal turn completion) should synthesize these messages to ensure correct middleware behavior. +/// In such cases, may be set to a synthetic value or left . +/// +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class ResponseCreatedRealtimeServerMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// + /// The should be or . + /// + public ResponseCreatedRealtimeServerMessage(RealtimeServerMessageType type) + { + Type = type; + } + + /// + /// Gets or sets the output audio options for the response. If null, the default conversation audio options will be used. + /// + public RealtimeAudioFormat? OutputAudioOptions { get; set; } + + /// + /// Gets or sets the voice of the output audio. + /// + public string? OutputVoice { get; set; } + + /// + /// Gets or sets the unique response ID. + /// + /// + /// Some providers (e.g., OpenAI) assign a unique ID to each response. Providers that do not + /// natively track response lifecycles may set this to or generate a synthetic ID. + /// Consumers should not assume this value correlates to a provider-specific concept. + /// + public string? ResponseId { get; set; } + + /// + /// Gets or sets the maximum number of output tokens for the response, inclusive of all modalities and tool calls. + /// + /// + /// This limit applies to the total output tokens regardless of modality (text, audio, etc.). + /// If , the provider's default limit was used. + /// + public int? MaxOutputTokens { get; set; } + + /// + /// Gets or sets any additional properties associated with the response. + /// + /// + /// Contains arbitrary key-value metadata attached to the response. + /// This is the metadata that was provided when the response was created + /// (e.g., for tracking or disambiguating multiple simultaneous responses). + /// + public AdditionalPropertiesDictionary? AdditionalProperties { get; set; } + + /// + /// Gets or sets the list of the conversation items included in the response. + /// + public IList? Items { get; set; } + + /// + /// Gets or sets the output modalities for the response. like "text", "audio". + /// If null, then default conversation modalities will be used. + /// + public IList? OutputModalities { get; set; } + + /// + /// Gets or sets the status of the response. + /// + /// + /// Typically set on messages to indicate + /// how the response ended. See for well-known values + /// such as , + /// (e.g., due to user barge-in), , + /// and . + /// + public string? Status { get; set; } + + /// + /// Gets or sets the error content of the response, if any. + /// + public ErrorContent? Error { get; set; } + + /// + /// Gets or sets the per-response token usage for billing purposes. + /// + /// + /// Populated when the response is complete (i.e., on ). + /// Input tokens include the entire conversation context, so they grow over successive turns + /// as previous output becomes input for later responses. + /// + public UsageDetails? Usage { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ResponseOutputItemRealtimeServerMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ResponseOutputItemRealtimeServerMessage.cs new file mode 100644 index 00000000000..bd2d5ecbafb --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/ResponseOutputItemRealtimeServerMessage.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a real-time message representing a new output item added or created during response generation. +/// +/// +/// +/// Used with the and messages. +/// +/// +/// Provider implementations should emit this message with +/// when an output item (such as a function call or text message) has completed. The built-in +/// middleware depends on this message to detect +/// and invoke tool calls. +/// +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class ResponseOutputItemRealtimeServerMessage : RealtimeServerMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// + /// The should be or . + /// + public ResponseOutputItemRealtimeServerMessage(RealtimeServerMessageType type) + { + Type = type; + } + + /// + /// Gets or sets the unique response ID. + /// + /// + /// May be for providers that do not natively track response lifecycle. + /// + public string? ResponseId { get; set; } + + /// + /// Gets or sets the unique output index. + /// + public int? OutputIndex { get; set; } + + /// + /// Gets or sets the conversation item included in the response. + /// + public RealtimeConversationItem? Item { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SessionUpdateRealtimeClientMessage.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SessionUpdateRealtimeClientMessage.cs new file mode 100644 index 00000000000..a2c1bbb614f --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Realtime/SessionUpdateRealtimeClientMessage.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a client message that requests updating the session configuration. +/// +/// +/// +/// Sending this message requests that the provider update the active session with new options. +/// Not all providers support mid-session updates. Providers that do not support this message +/// may ignore it or throw a . +/// +/// +/// When a provider processes this message, it should update its +/// property to reflect the new configuration. +/// +/// +[Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] +public class SessionUpdateRealtimeClientMessage : RealtimeClientMessage +{ + /// + /// Initializes a new instance of the class. + /// + /// The session options to apply. + public SessionUpdateRealtimeClientMessage(RealtimeSessionOptions options) + { + Options = Throw.IfNull(options); + } + + /// + /// Gets or sets the session options to apply. + /// + public RealtimeSessionOptions Options { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs index 24021577803..e8fc8517ab1 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs @@ -38,7 +38,7 @@ public SpeechToTextClientMetadata(string? providerName = null, Uri? providerUri /// Gets the ID of the default model used by this speech to text client. /// /// This value can be null if either the name is unknown or there are multiple possible models associated with this instance. - /// An individual request may override this value via . + /// An individual request may override this value via . /// public string? DefaultModelId { get; } } diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs index 856442fbad3..aacd4259db4 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs @@ -26,9 +26,8 @@ protected SpeechToTextOptions(SpeechToTextOptions? other) } AdditionalProperties = other.AdditionalProperties?.Clone(); - ModelId = other.ModelId; + Transcription = other.Transcription; RawRepresentationFactory = other.RawRepresentationFactory; - SpeechLanguage = other.SpeechLanguage; SpeechSampleRate = other.SpeechSampleRate; TextLanguage = other.TextLanguage; } @@ -36,11 +35,8 @@ protected SpeechToTextOptions(SpeechToTextOptions? other) /// Gets or sets any additional properties associated with the options. public AdditionalPropertiesDictionary? AdditionalProperties { get; set; } - /// Gets or sets the model ID for the speech to text. - public string? ModelId { get; set; } - - /// Gets or sets the language of source speech. - public string? SpeechLanguage { get; set; } + /// Gets or sets the transcription options for the speech to text request. + public TranscriptionOptions? Transcription { get; set; } /// Gets or sets the sample rate of the speech input audio. public int? SpeechSampleRate { get; set; } diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/TranscriptionOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/TranscriptionOptions.cs new file mode 100644 index 00000000000..876c71c2aca --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/TranscriptionOptions.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents options for configuring transcription. +/// +[Experimental(DiagnosticIds.Experiments.AISpeechToText, UrlFormat = DiagnosticIds.UrlFormat)] +public class TranscriptionOptions +{ + /// + /// Initializes a new instance of the class. + /// + public TranscriptionOptions() + { + } + + /// + /// Gets or sets the language of the input speech audio. + /// + /// + /// The language should be specified in ISO-639-1 format (e.g. "en"). + /// Supplying the input speech language improves transcription accuracy and latency. + /// + public string? SpeechLanguage { get; set; } + + /// + /// Gets or sets the model ID to use for transcription. + /// + public string? ModelId { get; set; } + + /// + /// Gets or sets an optional prompt to guide the transcription. + /// + public string? Prompt { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/UsageDetails.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/UsageDetails.cs index b3edbad5e99..4af1aa83b6a 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/UsageDetails.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/UsageDetails.cs @@ -1,9 +1,12 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System; using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; using Microsoft.Shared.Diagnostics; namespace Microsoft.Extensions.AI; @@ -38,6 +41,38 @@ public class UsageDetails /// public long? ReasoningTokenCount { get; set; } + /// Gets or sets the number of audio input tokens used. + /// + /// Audio input tokens should be counted as part of . + /// + [Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + [JsonIgnore] + public long? InputAudioTokenCount { get; set; } + + /// Gets or sets the number of text input tokens used. + /// + /// Text input tokens should be counted as part of . + /// + [Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + [JsonIgnore] + public long? InputTextTokenCount { get; set; } + + /// Gets or sets the number of audio output tokens used. + /// + /// Audio output tokens should be counted as part of . + /// + [Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + [JsonIgnore] + public long? OutputAudioTokenCount { get; set; } + + /// Gets or sets the number of text output tokens used. + /// + /// Text output tokens should be counted as part of . + /// + [Experimental(DiagnosticIds.Experiments.AIRealTime, UrlFormat = DiagnosticIds.UrlFormat)] + [JsonIgnore] + public long? OutputTextTokenCount { get; set; } + /// Gets or sets a dictionary of additional usage counts. /// /// All values set here are assumed to be summable. For example, when middleware makes multiple calls to an underlying @@ -57,6 +92,10 @@ public void Add(UsageDetails usage) TotalTokenCount = NullableSum(TotalTokenCount, usage.TotalTokenCount); CachedInputTokenCount = NullableSum(CachedInputTokenCount, usage.CachedInputTokenCount); ReasoningTokenCount = NullableSum(ReasoningTokenCount, usage.ReasoningTokenCount); + InputAudioTokenCount = NullableSum(InputAudioTokenCount, usage.InputAudioTokenCount); + InputTextTokenCount = NullableSum(InputTextTokenCount, usage.InputTextTokenCount); + OutputAudioTokenCount = NullableSum(OutputAudioTokenCount, usage.OutputAudioTokenCount); + OutputTextTokenCount = NullableSum(OutputTextTokenCount, usage.OutputTextTokenCount); if (usage.AdditionalCounts is { } countsToAdd) { @@ -109,6 +148,25 @@ internal string DebuggerDisplay parts.Add($"{nameof(ReasoningTokenCount)} = {reasoning}"); } + if (InputAudioTokenCount is { } inputAudio) + { + parts.Add($"{nameof(InputAudioTokenCount)} = {inputAudio}"); + } + + if (InputTextTokenCount is { } inputText) + { + parts.Add($"{nameof(InputTextTokenCount)} = {inputText}"); + } + + if (OutputAudioTokenCount is { } outputAudio) + { + parts.Add($"{nameof(OutputAudioTokenCount)} = {outputAudio}"); + } + + if (OutputTextTokenCount is { } outputText) + { + parts.Add($"{nameof(OutputTextTokenCount)} = {outputText}"); + } if (AdditionalCounts is { } additionalCounts) { foreach (var entry in additionalCounts) diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Microsoft.Extensions.AI.Evaluation.Reporting.csproj b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Microsoft.Extensions.AI.Evaluation.Reporting.csproj index 8ee31bc2b1a..8a960fc4df1 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Microsoft.Extensions.AI.Evaluation.Reporting.csproj +++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Microsoft.Extensions.AI.Evaluation.Reporting.csproj @@ -1,6 +1,6 @@  -