Skip to content

Commit b03c3d0

Browse files
.Net: Support Support BinaryContent in Gemini Connector (#13140)
### Motivation and Context Resolves: #13131 ### Description The Gemini connector actually already supports arbitrary base64 PDF content by prioritising binary data in the `CreateGeminiPartFromImage` function in the `GeminiRequest.cs`. I kept the current behaviour with the `ImageContent` and `AudioContent` but also added explicit support for the `BinaryContent` KernelContent type in the `GetGeminiPartFromKernelContent` function to bring it inline with how the other connectors work. I applied the same Chat Completion Unit test as those I found in the OpenAI Connector for BinaryContent. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone (I think...) 😄 --------- Co-authored-by: SergeyMenshykh <[email protected]>
1 parent 1fc6a63 commit b03c3d0

File tree

7 files changed

+384
-0
lines changed

7 files changed

+384
-0
lines changed
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using Microsoft.SemanticKernel;
4+
using Microsoft.SemanticKernel.ChatCompletion;
5+
using Resources;
6+
7+
namespace ChatCompletion;
8+
9+
/// <summary>
10+
/// This sample shows how to use binary file and inline Base64 inputs, like PDFs, with Google Gemini's chat completion.
11+
/// </summary>
12+
public class Google_GeminiChatCompletionWithFile(ITestOutputHelper output) : BaseTest(output)
13+
{
14+
[Fact]
15+
public async Task GoogleAIChatCompletionWithLocalFile()
16+
{
17+
Console.WriteLine("============= Google AI - Gemini Chat Completion With Local File =============");
18+
19+
Assert.NotNull(TestConfiguration.GoogleAI.ApiKey);
20+
Assert.NotNull(TestConfiguration.GoogleAI.Gemini.ModelId);
21+
22+
Kernel kernel = Kernel.CreateBuilder()
23+
.AddGoogleAIGeminiChatCompletion(TestConfiguration.GoogleAI.Gemini.ModelId, TestConfiguration.GoogleAI.ApiKey)
24+
.Build();
25+
26+
var fileBytes = await EmbeddedResource.ReadAllAsync("employees.pdf");
27+
28+
var chatHistory = new ChatHistory("You are a friendly assistant.");
29+
chatHistory.AddUserMessage(
30+
[
31+
new TextContent("What's in this file?"),
32+
new BinaryContent(fileBytes, "application/pdf")
33+
]);
34+
35+
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
36+
37+
var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
38+
39+
Console.WriteLine(reply.Content);
40+
}
41+
42+
[Fact]
43+
public async Task VertexAIChatCompletionWithLocalFile()
44+
{
45+
Console.WriteLine("============= Vertex AI - Gemini Chat Completion With Local File =============");
46+
47+
Assert.NotNull(TestConfiguration.VertexAI.BearerKey);
48+
Assert.NotNull(TestConfiguration.VertexAI.Location);
49+
Assert.NotNull(TestConfiguration.VertexAI.ProjectId);
50+
Assert.NotNull(TestConfiguration.VertexAI.Gemini.ModelId);
51+
52+
Kernel kernel = Kernel.CreateBuilder()
53+
.AddVertexAIGeminiChatCompletion(
54+
modelId: TestConfiguration.VertexAI.Gemini.ModelId,
55+
bearerKey: TestConfiguration.VertexAI.BearerKey,
56+
location: TestConfiguration.VertexAI.Location,
57+
projectId: TestConfiguration.VertexAI.ProjectId)
58+
.Build();
59+
60+
var fileBytes = await EmbeddedResource.ReadAllAsync("employees.pdf");
61+
62+
var chatHistory = new ChatHistory("You are a friendly assistant.");
63+
chatHistory.AddUserMessage(
64+
[
65+
new TextContent("What's in this file?"),
66+
new BinaryContent(fileBytes, "application/pdf"),
67+
]);
68+
69+
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
70+
71+
var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
72+
73+
Console.WriteLine(reply.Content);
74+
}
75+
76+
[Fact]
77+
public async Task GoogleAIChatCompletionWithBase64DataUri()
78+
{
79+
Console.WriteLine("============= Google AI - Gemini Chat Completion With Base64 Data Uri =============");
80+
81+
Assert.NotNull(TestConfiguration.GoogleAI.ApiKey);
82+
Assert.NotNull(TestConfiguration.GoogleAI.Gemini.ModelId);
83+
84+
Kernel kernel = Kernel.CreateBuilder()
85+
.AddGoogleAIGeminiChatCompletion(TestConfiguration.GoogleAI.Gemini.ModelId, TestConfiguration.GoogleAI.ApiKey)
86+
.Build();
87+
88+
var fileBytes = await EmbeddedResource.ReadAllAsync("employees.pdf");
89+
var fileBase64 = Convert.ToBase64String(fileBytes.ToArray());
90+
var dataUri = $"data:application/pdf;base64,{fileBase64}";
91+
92+
var chatHistory = new ChatHistory("You are a friendly assistant.");
93+
chatHistory.AddUserMessage(
94+
[
95+
new TextContent("What's in this file?"),
96+
new BinaryContent(dataUri)
97+
// Google AI Gemini AI does not support arbitrary URIs but we can convert a Base64 URI into InlineData with the correct mimeType.
98+
]);
99+
100+
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
101+
102+
var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
103+
104+
Console.WriteLine(reply.Content);
105+
}
106+
107+
[Fact]
108+
public async Task VertexAIChatCompletionWithBase64DataUri()
109+
{
110+
Console.WriteLine("============= Vertex AI - Gemini Chat Completion With Base64 Data Uri =============");
111+
112+
Assert.NotNull(TestConfiguration.VertexAI.BearerKey);
113+
Assert.NotNull(TestConfiguration.VertexAI.Location);
114+
Assert.NotNull(TestConfiguration.VertexAI.ProjectId);
115+
Assert.NotNull(TestConfiguration.VertexAI.Gemini.ModelId);
116+
117+
Kernel kernel = Kernel.CreateBuilder()
118+
.AddVertexAIGeminiChatCompletion(
119+
modelId: TestConfiguration.VertexAI.Gemini.ModelId,
120+
bearerKey: TestConfiguration.VertexAI.BearerKey,
121+
location: TestConfiguration.VertexAI.Location,
122+
projectId: TestConfiguration.VertexAI.ProjectId)
123+
.Build();
124+
125+
var fileBytes = await EmbeddedResource.ReadAllAsync("employees.pdf");
126+
var fileBase64 = Convert.ToBase64String(fileBytes.ToArray());
127+
var dataUri = $"data:application/pdf;base64,{fileBase64}";
128+
129+
var chatHistory = new ChatHistory("You are a friendly assistant.");
130+
chatHistory.AddUserMessage(
131+
[
132+
new TextContent("What's in this file?"),
133+
new BinaryContent(dataUri)
134+
// Vertex AI API does not support URIs outside of inline Base64 or GCS buckets within the same project. The bucket that stores the file must be in the same Google Cloud project that's sending the request. You must always provide the mimeType via the metadata property.
135+
// var content = new BinaryContent(gs://generativeai-downloads/files/employees.pdf);
136+
// content.Metadata = new Dictionary<string, object?> { { "mimeType", "application/pdf" } };
137+
]);
138+
139+
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
140+
141+
var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
142+
143+
Console.WriteLine(reply.Content);
144+
}
145+
}

dotnet/samples/Concepts/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom
6464
- [Google_GeminiChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiChatCompletion.cs)
6565
- [Google_GeminiChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiChatCompletionStreaming.cs)
6666
- [Google_GeminiChatCompletionWithThinkingBudget](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiChatCompletionWithThinkingBudget.cs)
67+
- [Google_GeminiChatCompletionWithFile.cs](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiChatCompletionWithFile.cs)
6768
- [Google_GeminiGetModelResult](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiGetModelResult.cs)
6869
- [Google_GeminiStructuredOutputs](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiStructuredOutputs.cs)
6970
- [Google_GeminiVision](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiVision.cs)

dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/GeminiRequestTests.cs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,33 @@ public void FromChatHistoryAudioAsAudioContentItReturnsWithChatHistory()
371371
.SequenceEqual(Convert.FromBase64String(c.Parts![0].InlineData!.InlineData))));
372372
}
373373

374+
[Fact]
375+
public void FromChatHistoryPdfAsBinaryContentItReturnsWithChatHistory()
376+
{
377+
// Arrange
378+
ReadOnlyMemory<byte> pdfAsBytes = new byte[] { 0x00, 0x01, 0x02, 0x03 };
379+
ChatHistory chatHistory = [];
380+
chatHistory.AddUserMessage("user-message");
381+
chatHistory.AddAssistantMessage("assist-message");
382+
chatHistory.AddUserMessage(contentItems:
383+
[new BinaryContent(new Uri("https://example-file.com/file.pdf")) { MimeType = "application/pdf" }]);
384+
chatHistory.AddUserMessage(contentItems:
385+
[new BinaryContent(pdfAsBytes, "application/pdf")]);
386+
var executionSettings = new GeminiPromptExecutionSettings();
387+
388+
// Act
389+
var request = GeminiRequest.FromChatHistoryAndExecutionSettings(chatHistory, executionSettings);
390+
391+
// Assert
392+
Assert.Collection(request.Contents,
393+
c => Assert.Equal(chatHistory[0].Content, c.Parts![0].Text),
394+
c => Assert.Equal(chatHistory[1].Content, c.Parts![0].Text),
395+
c => Assert.Equal(chatHistory[2].Items.Cast<BinaryContent>().Single().Uri,
396+
c.Parts![0].FileData!.FileUri),
397+
c => Assert.True(pdfAsBytes.ToArray()
398+
.SequenceEqual(Convert.FromBase64String(c.Parts![0].InlineData!.InlineData))));
399+
}
400+
374401
[Fact]
375402
public void FromChatHistoryUnsupportedContentItThrowsNotSupportedException()
376403
{

dotnet/src/Connectors/Connectors.Google.UnitTests/Services/GoogleAIGeminiChatCompletionServiceTests.cs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
using System.IO;
66
using System.Net.Http;
77
using System.Text;
8+
using System.Text.Json;
89
using System.Threading.Tasks;
10+
using Microsoft.SemanticKernel;
911
using Microsoft.SemanticKernel.ChatCompletion;
1012
using Microsoft.SemanticKernel.Connectors.Google;
1113
using Microsoft.SemanticKernel.Services;
@@ -144,6 +146,78 @@ public async Task RequestBodyIncludesThinkingConfigWhenSetAsync(int? thinkingBud
144146
}
145147
}
146148

149+
[Fact]
150+
public async Task GetChatMessageContentsAsyncThrowsExceptionWithEmptyBinaryContentAsync()
151+
{
152+
// Arrange
153+
var sut = new GoogleAIGeminiChatCompletionService("gemini-2.5-pro", "key");
154+
155+
var chatHistory = new ChatHistory();
156+
chatHistory.AddUserMessage([new BinaryContent()]);
157+
158+
// Act & Assert
159+
await Assert.ThrowsAsync<InvalidOperationException>(() => sut.GetChatMessageContentsAsync(chatHistory));
160+
}
161+
162+
[Fact]
163+
public async Task GetChatMessageContentsThrowsExceptionUriOnlyReferenceBinaryContentAsync()
164+
{
165+
// Arrange
166+
var sut = new GoogleAIGeminiChatCompletionService("gemini-2.5-pro", "key");
167+
168+
var chatHistory = new ChatHistory();
169+
chatHistory.AddUserMessage([new BinaryContent(new Uri("file://testfile.pdf"))]);
170+
171+
// Act & Assert
172+
await Assert.ThrowsAsync<InvalidOperationException>(() => sut.GetChatMessageContentsAsync(chatHistory));
173+
}
174+
175+
[Theory]
176+
[InlineData(true)]
177+
[InlineData(false)]
178+
public async Task ItSendsBinaryContentCorrectlyAsync(bool useUriData)
179+
{
180+
// Arrange
181+
var sut = new GoogleAIGeminiChatCompletionService("gemini-2.5-pro", "key", httpClient: this._httpClient);
182+
183+
var mimeType = "application/pdf";
184+
var chatHistory = new ChatHistory();
185+
chatHistory.AddUserMessage([
186+
new TextContent("What's in this file?"),
187+
useUriData
188+
? new BinaryContent($"data:{mimeType};base64,{PdfBase64Data}")
189+
: new BinaryContent(Convert.FromBase64String(PdfBase64Data), mimeType)
190+
]);
191+
192+
// Act
193+
await sut.GetChatMessageContentsAsync(chatHistory);
194+
195+
// Assert
196+
var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent!);
197+
Assert.NotNull(actualRequestContent);
198+
var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
199+
200+
var contents = optionsJson.GetProperty("contents");
201+
Assert.Equal(1, contents.GetArrayLength());
202+
203+
var parts = contents[0].GetProperty("parts");
204+
Assert.Equal(2, parts.GetArrayLength());
205+
206+
Assert.True(parts[0].TryGetProperty("text", out var prompt));
207+
Assert.Equal("What's in this file?", prompt.ToString());
208+
209+
// Check for the file data
210+
Assert.True(parts[1].TryGetProperty("inlineData", out var inlineData));
211+
Assert.Equal(JsonValueKind.Object, inlineData.ValueKind);
212+
Assert.Equal(mimeType, inlineData.GetProperty("mimeType").GetString());
213+
Assert.Equal(PdfBase64Data, inlineData.GetProperty("data").ToString());
214+
}
215+
216+
/// <summary>
217+
/// Sample PDF data URI for testing.
218+
/// </summary>
219+
private const string PdfBase64Data = "JVBERi0xLjQKMSAwIG9iago8PC9UeXBlIC9DYXRhbG9nCi9QYWdlcyAyIDAgUgo+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlIC9QYWdlcwovS2lkcyBbMyAwIFJdCi9Db3VudCAxCj4+CmVuZG9iagozIDAgb2JqCjw8L1R5cGUgL1BhZ2UKL1BhcmVudCAyIDAgUgovTWVkaWFCb3ggWzAgMCA1OTUgODQyXQovQ29udGVudHMgNSAwIFIKL1Jlc291cmNlcyA8PC9Qcm9jU2V0IFsvUERGIC9UZXh0XQovRm9udCA8PC9GMSA0IDAgUj4+Cj4+Cj4+CmVuZG9iago0IDAgb2JqCjw8L1R5cGUgL0ZvbnQKL1N1YnR5cGUgL1R5cGUxCi9OYW1lIC9GMQovQmFzZUZvbnQgL0hlbHZldGljYQovRW5jb2RpbmcgL01hY1JvbWFuRW5jb2RpbmcKPj4KZW5kb2JqCjUgMCBvYmoKPDwvTGVuZ3RoIDUzCj4+CnN0cmVhbQpCVAovRjEgMjAgVGYKMjIwIDQwMCBUZAooRHVtbXkgUERGKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCnhyZWYKMCA2CjAwMDAwMDAwMDAgNjU1MzUgZgowMDAwMDAwMDA5IDAwMDAwIG4KMDAwMDAwMDA2MyAwMDAwMCBuCjAwMDAwMDAxMjQgMDAwMDAgbgowMDAwMDAwMjc3IDAwMDAwIG4KMDAwMDAwMDM5MiAwMDAwMCBuCnRyYWlsZXIKPDwvU2l6ZSA2Ci9Sb290IDEgMCBSCj4+CnN0YXJ0eHJlZgo0OTUKJSVFT0YK";
220+
147221
public void Dispose()
148222
{
149223
this._httpClient.Dispose();

dotnet/src/Connectors/Connectors.Google.UnitTests/Services/VertexAIGeminiChatCompletionServiceTests.cs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
using System.IO;
66
using System.Net.Http;
77
using System.Text;
8+
using System.Text.Json;
89
using System.Threading.Tasks;
10+
using Microsoft.SemanticKernel;
911
using Microsoft.SemanticKernel.ChatCompletion;
1012
using Microsoft.SemanticKernel.Connectors.Google;
1113
using Microsoft.SemanticKernel.Services;
@@ -156,6 +158,78 @@ public async Task RequestBodyIncludesThinkingConfigWhenSetAsync(int? thinkingBud
156158
}
157159
}
158160

161+
[Fact]
162+
public async Task GetChatMessageContentsAsyncThrowsExceptionWithEmptyBinaryContentAsync()
163+
{
164+
// Arrange
165+
var sut = new VertexAIGeminiChatCompletionService("gemini-2.5-pro", "key", "location", "project");
166+
167+
var chatHistory = new ChatHistory();
168+
chatHistory.AddUserMessage([new BinaryContent()]);
169+
170+
// Act & Assert
171+
await Assert.ThrowsAsync<InvalidOperationException>(() => sut.GetChatMessageContentsAsync(chatHistory));
172+
}
173+
174+
[Fact]
175+
public async Task GetChatMessageContentsThrowsExceptionUriOnlyReferenceBinaryContentAsync()
176+
{
177+
// Arrange
178+
var sut = new VertexAIGeminiChatCompletionService("gemini-2.5-pro", "key", "location", "project");
179+
180+
var chatHistory = new ChatHistory();
181+
chatHistory.AddUserMessage([new BinaryContent(new Uri("file://testfile.pdf"))]);
182+
183+
// Act & Assert
184+
await Assert.ThrowsAsync<InvalidOperationException>(() => sut.GetChatMessageContentsAsync(chatHistory));
185+
}
186+
187+
[Theory]
188+
[InlineData(true)]
189+
[InlineData(false)]
190+
public async Task ItSendsBinaryContentCorrectlyAsync(bool useUriData)
191+
{
192+
// Arrange
193+
var sut = new VertexAIGeminiChatCompletionService("gemini-2.5-pro", "key", "location", "project", httpClient: this._httpClient);
194+
195+
var mimeType = "application/pdf";
196+
var chatHistory = new ChatHistory();
197+
chatHistory.AddUserMessage([
198+
new TextContent("What's in this file?"),
199+
useUriData
200+
? new BinaryContent($"data:{mimeType};base64,{PdfBase64Data}")
201+
: new BinaryContent(Convert.FromBase64String(PdfBase64Data), mimeType)
202+
]);
203+
204+
// Act
205+
await sut.GetChatMessageContentsAsync(chatHistory);
206+
207+
// Assert
208+
var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent!);
209+
Assert.NotNull(actualRequestContent);
210+
var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
211+
212+
var contents = optionsJson.GetProperty("contents");
213+
Assert.Equal(1, contents.GetArrayLength());
214+
215+
var parts = contents[0].GetProperty("parts");
216+
Assert.Equal(2, parts.GetArrayLength());
217+
218+
Assert.True(parts[0].TryGetProperty("text", out var prompt));
219+
Assert.Equal("What's in this file?", prompt.ToString());
220+
221+
// Check for the file data
222+
Assert.True(parts[1].TryGetProperty("inlineData", out var inlineData));
223+
Assert.Equal(JsonValueKind.Object, inlineData.ValueKind);
224+
Assert.Equal(mimeType, inlineData.GetProperty("mimeType").GetString());
225+
Assert.Equal(PdfBase64Data, inlineData.GetProperty("data").ToString());
226+
}
227+
228+
/// <summary>
229+
/// Sample PDF data URI for testing.
230+
/// </summary>
231+
private const string PdfBase64Data = "JVBERi0xLjQKMSAwIG9iago8PC9UeXBlIC9DYXRhbG9nCi9QYWdlcyAyIDAgUgo+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlIC9QYWdlcwovS2lkcyBbMyAwIFJdCi9Db3VudCAxCj4+CmVuZG9iagozIDAgb2JqCjw8L1R5cGUgL1BhZ2UKL1BhcmVudCAyIDAgUgovTWVkaWFCb3ggWzAgMCA1OTUgODQyXQovQ29udGVudHMgNSAwIFIKL1Jlc291cmNlcyA8PC9Qcm9jU2V0IFsvUERGIC9UZXh0XQovRm9udCA8PC9GMSA0IDAgUj4+Cj4+Cj4+CmVuZG9iago0IDAgb2JqCjw8L1R5cGUgL0ZvbnQKL1N1YnR5cGUgL1R5cGUxCi9OYW1lIC9GMQovQmFzZUZvbnQgL0hlbHZldGljYQovRW5jb2RpbmcgL01hY1JvbWFuRW5jb2RpbmcKPj4KZW5kb2JqCjUgMCBvYmoKPDwvTGVuZ3RoIDUzCj4+CnN0cmVhbQpCVAovRjEgMjAgVGYKMjIwIDQwMCBUZAooRHVtbXkgUERGKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCnhyZWYKMCA2CjAwMDAwMDAwMDAgNjU1MzUgZgowMDAwMDAwMDA5IDAwMDAwIG4KMDAwMDAwMDA2MyAwMDAwMCBuCjAwMDAwMDAxMjQgMDAwMDAgbgowMDAwMDAwMjc3IDAwMDAwIG4KMDAwMDAwMDM5MiAwMDAwMCBuCnRyYWlsZXIKPDwvU2l6ZSA2Ci9Sb290IDEgMCBSCj4+CnN0YXJ0eHJlZgo0OTUKJSVFT0YK";
232+
159233
public void Dispose()
160234
{
161235
this._httpClient.Dispose();

0 commit comments

Comments
 (0)