Skip to content

Commit dd51ed3

Browse files
authored
Merge pull request #225 from AzureCosmosDB/copilot/fix-nested-elements-json-sink
Fix Dictionary serialization in JSON sink for MongoDB nested documents
2 parents 6b20d7a + e0b5427 commit dd51ed3

File tree

3 files changed

+152
-0
lines changed

3 files changed

+152
-0
lines changed

Extensions/Json/Cosmos.DataTransfer.JsonExtension.UnitTests/JsonFileSinkTests.cs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using Cosmos.DataTransfer.Common.UnitTests;
33
using Microsoft.Extensions.Logging.Abstractions;
44
using Newtonsoft.Json;
5+
using Newtonsoft.Json.Linq;
56

67
namespace Cosmos.DataTransfer.JsonExtension.UnitTests
78
{
@@ -45,5 +46,68 @@ public async Task WriteAsync_WithFlatObjects_WritesToValidFile()
4546
Assert.IsTrue(outputData.Any(o => o.Id == 2 && o.Name == "Two"));
4647
Assert.IsTrue(outputData.Any(o => o.Id == 3 && o.Name == "Three"));
4748
}
49+
50+
[TestMethod]
51+
public async Task WriteAsync_WithNestedDictionaries_SerializesCorrectly()
52+
{
53+
// Test case for the MongoDB nested elements issue
54+
var sink = new JsonFileSink();
55+
56+
var data = new List<DictionaryDataItem>
57+
{
58+
new(new Dictionary<string, object?>
59+
{
60+
{ "_id", new Dictionary<string, object?> { { "$oid", "some_id" } } },
61+
{ "thread_id", "thread_id" },
62+
{ "content", new List<Dictionary<string, object?>>
63+
{
64+
new Dictionary<string, object?>
65+
{
66+
{ "text", "a message text" },
67+
{ "type", "text" }
68+
}
69+
}
70+
},
71+
{ "role", "user" }
72+
})
73+
};
74+
75+
string outputFile = $"{DateTime.Now:yy-MM-dd}_FS_Nested_Output.json";
76+
var config = TestHelpers.CreateConfig(new Dictionary<string, string>
77+
{
78+
{ "FilePath", outputFile }
79+
});
80+
81+
await sink.WriteAsync(data.ToAsyncEnumerable(), config, new JsonFileSource(), NullLogger.Instance);
82+
83+
var jsonContent = await File.ReadAllTextAsync(outputFile);
84+
var outputArray = JArray.Parse(jsonContent);
85+
86+
Assert.AreEqual(1, outputArray.Count);
87+
88+
var doc = outputArray[0] as JObject;
89+
Assert.IsNotNull(doc);
90+
91+
// Verify _id is an object with $oid field
92+
var idObj = doc["_id"] as JObject;
93+
Assert.IsNotNull(idObj, "_id should be an object");
94+
Assert.AreEqual("some_id", idObj["$oid"]?.ToString());
95+
96+
// Verify thread_id is a string
97+
Assert.AreEqual("thread_id", doc["thread_id"]?.ToString());
98+
99+
// Verify content is an array of objects
100+
var contentArray = doc["content"] as JArray;
101+
Assert.IsNotNull(contentArray, "content should be an array");
102+
Assert.AreEqual(1, contentArray.Count);
103+
104+
var contentItem = contentArray[0] as JObject;
105+
Assert.IsNotNull(contentItem, "content item should be an object");
106+
Assert.AreEqual("a message text", contentItem["text"]?.ToString());
107+
Assert.AreEqual("text", contentItem["type"]?.ToString());
108+
109+
// Verify role is a string
110+
Assert.AreEqual("user", doc["role"]?.ToString());
111+
}
48112
}
49113
}

Interfaces/Cosmos.DataTransfer.Common.UnitTests/DataItemJsonConverterTests.cs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,5 +194,81 @@ public void Test_AsJsonString(bool includeNullFields) {
194194
var json = DataItemJsonConverter.AsJsonString(obj, false, includeNullFields);
195195
Assert.AreEqual(expected, json);
196196
}
197+
198+
[TestMethod]
199+
[DataRow(false)]
200+
[DataRow(true)]
201+
public void Test_WriteFieldValue_DictionaryAsNestedObject(bool includeNullFields)
202+
{
203+
// Test that Dictionary<string, object?> is properly serialized as nested object
204+
var nestedDict = new Dictionary<string, object?>
205+
{
206+
{ "text", "a message text" },
207+
{ "type", "text" },
208+
{ "NULL", null }
209+
};
210+
211+
var expected = "\"x\":{\"text\":\"a message text\",\"type\":\"text\",\"NULL\":null}";
212+
if (!includeNullFields)
213+
{
214+
expected = expected.Replace(",\"NULL\":null", "");
215+
}
216+
217+
var (writer, readFunc) = CreateUtf8JsonWriter();
218+
DataItemJsonConverter.WriteFieldValue(writer, "x", nestedDict, includeNullFields: includeNullFields);
219+
Assert.AreEqual(expected, readFunc(), $"includeNullFields: {includeNullFields}");
220+
}
221+
222+
[TestMethod]
223+
[DataRow(false)]
224+
[DataRow(true)]
225+
public void Test_WriteFieldValue_ArrayOfDictionaries(bool includeNullFields)
226+
{
227+
// Test array of dictionaries (simulating MongoDB nested array scenario)
228+
var arrayOfDicts = new List<Dictionary<string, object?>>
229+
{
230+
new Dictionary<string, object?>
231+
{
232+
{ "text", "a message text" },
233+
{ "type", "text" }
234+
},
235+
new Dictionary<string, object?>
236+
{
237+
{ "text", "another message" },
238+
{ "type", "text" }
239+
}
240+
};
241+
242+
var expected = "\"x\":[{\"text\":\"a message text\",\"type\":\"text\"},{\"text\":\"another message\",\"type\":\"text\"}]";
243+
244+
var (writer, readFunc) = CreateUtf8JsonWriter();
245+
DataItemJsonConverter.WriteFieldValue(writer, "x", arrayOfDicts, includeNullFields: includeNullFields);
246+
Assert.AreEqual(expected, readFunc(), $"includeNullFields: {includeNullFields}");
247+
}
248+
249+
[TestMethod]
250+
public void Test_AsJsonString_CompleteMongoScenario()
251+
{
252+
// Test complete scenario from the issue: nested _id object and array of content dictionaries
253+
var mongoStyleDoc = new DictionaryDataItem(new Dictionary<string, object?>
254+
{
255+
{ "_id", new Dictionary<string, object?> { { "$oid", "some_id" } } },
256+
{ "thread_id", "thread_id" },
257+
{ "content", new List<Dictionary<string, object?>>
258+
{
259+
new Dictionary<string, object?>
260+
{
261+
{ "text", "a message text" },
262+
{ "type", "text" }
263+
}
264+
}
265+
},
266+
{ "role", "user" }
267+
});
268+
269+
var expected = "{\"_id\":{\"$oid\":\"some_id\"},\"thread_id\":\"thread_id\",\"content\":[{\"text\":\"a message text\",\"type\":\"text\"}],\"role\":\"user\"}";
270+
var json = DataItemJsonConverter.AsJsonString(mongoStyleDoc, false, false);
271+
Assert.AreEqual(expected, json);
272+
}
197273
}
198274

Interfaces/Cosmos.DataTransfer.Common/DataItemJsonConverter.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ internal static void WriteFieldValue(Utf8JsonWriter writer, string fieldName, ob
104104
{
105105
WriteDataItem(writer, child, includeNullFields, propertyName);
106106
}
107+
else if (fieldValue is IDictionary<string, object?> dict)
108+
{
109+
// Handle dictionaries (e.g., from MongoDB BsonDocument conversion) as nested objects
110+
var dictItem = new DictionaryDataItem(dict);
111+
WriteDataItem(writer, dictItem, includeNullFields, propertyName);
112+
}
107113
else if (fieldValue is not string && fieldValue is IEnumerable children)
108114
{
109115
writer.WriteStartArray(propertyName);
@@ -113,6 +119,12 @@ internal static void WriteFieldValue(Utf8JsonWriter writer, string fieldName, ob
113119
{
114120
WriteDataItem(writer, arrayChild, includeNullFields);
115121
}
122+
else if (arrayItem is IDictionary<string, object?> arrayDict)
123+
{
124+
// Handle dictionaries (e.g., from MongoDB BsonDocument conversion) as nested objects
125+
var arrayDictItem = new DictionaryDataItem(arrayDict);
126+
WriteDataItem(writer, arrayDictItem, includeNullFields);
127+
}
116128
else if (TryGetLong(arrayItem, out var longValue))
117129
{
118130
writer.WriteNumberValue(longValue);

0 commit comments

Comments
 (0)