From 8f38aa969bf85d50abb8bc40e582c189a51759b3 Mon Sep 17 00:00:00 2001 From: salimlaimeche Date: Sat, 26 Jul 2025 14:44:51 +0200 Subject: [PATCH] fix done , no vectors in response --- azure-ai-search/lib/azure-search-client.ts | 96 ++++++++++++- azure-ai-search/package.json | 2 +- azure-ai-search/server.ts | 2 +- azure-ai-search/test-vector-filtering.js | 150 +++++++++++++++++++++ 4 files changed, 243 insertions(+), 7 deletions(-) create mode 100644 azure-ai-search/test-vector-filtering.js diff --git a/azure-ai-search/lib/azure-search-client.ts b/azure-ai-search/lib/azure-search-client.ts index 7828c8d..a539d9f 100644 --- a/azure-ai-search/lib/azure-search-client.ts +++ b/azure-ai-search/lib/azure-search-client.ts @@ -21,6 +21,77 @@ export class AzureSearchTools { private searchClients: Map> = new Map(); private config: AzureSearchConfig; + /** + * Removes vector fields from search results to avoid returning large vector arrays + * @param results Array of search results + * @returns Filtered results without vector fields + */ + private removeVectorFields(results: any[]): any[] { + return results.map(result => { + if (!result || typeof result !== 'object') return result; + + const filteredResult = { ...result }; + + // Recursively filter nested objects (like document property) + this.filterVectorFieldsRecursive(filteredResult); + + return filteredResult; + }); + } + + /** + * Recursively removes vector fields from an object + * @param obj Object to filter + */ + private filterVectorFieldsRecursive(obj: any): void { + if (!obj || typeof obj !== 'object') return; + + Object.keys(obj).forEach(key => { + const value = obj[key]; + + // Check if this field should be removed + if (this.isVectorField(key, value)) { + delete obj[key]; + return; + } + + // Recursively filter nested objects + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + this.filterVectorFieldsRecursive(value); + } + }); + } + + /** + * Determines if a field is a vector field that should be removed + * @param key Field name + * @param value Field value + * @returns True if field should be removed + */ + private isVectorField(key: string, value: any): boolean { + const keyLower = key.toLowerCase(); + + // Check field name patterns + if (keyLower.includes('vector') || + keyLower.includes('embedding') || + key.endsWith('Vector') || + key.endsWith('_vector') || + key.endsWith('Embedding') || + key.endsWith('_embedding')) { + return true; + } + + // Check if it's a large numeric array (likely a vector) + if (Array.isArray(value) && + value.length > 50 && // Lower threshold for safety + value.length < 10000 && // Reasonable upper bound for vectors + value.every((item: any) => typeof item === 'number')) { + return true; + } + + return false; + } + constructor(config?: Partial) { this.config = { endpoint: config?.endpoint || process.env.AZURE_SEARCH_ENDPOINT || "", @@ -102,10 +173,13 @@ export class AzureSearchTools { results.push(result); } + // Remove vector fields from results + const filteredResults = this.removeVectorFields(results); + return { success: true, data: { - results, + results: filteredResults, count: response.count, facets: response.facets, coverage: response.coverage, @@ -132,9 +206,12 @@ export class AzureSearchTools { selectedFields: params.select, }); + // Remove vector fields from result + const filteredResult = this.removeVectorFields([result])[0]; + return { success: true, - data: result, + data: filteredResult, }; } catch (error) { return { @@ -502,10 +579,13 @@ export class AzureSearchTools { results.push(result); } + // Remove vector fields from results + const filteredResults = this.removeVectorFields(results); + return { success: true, data: { - results, + results: filteredResults, count: response.count, }, }; @@ -557,10 +637,13 @@ export class AzureSearchTools { results.push(result); } + // Remove vector fields from results + const filteredResults = this.removeVectorFields(results); + return { success: true, data: { - results, + results: filteredResults, count: response.count, facets: response.facets, coverage: response.coverage, @@ -635,10 +718,13 @@ export class AzureSearchTools { results.push(result); } + // Remove vector fields from results + const filteredResults = this.removeVectorFields(results); + return { success: true, data: { - results, + results: filteredResults, count: response.count, answers: (response as any).answers?.map((answer: any) => ({ key: answer.key || "", diff --git a/azure-ai-search/package.json b/azure-ai-search/package.json index faccc8e..8a0eb64 100644 --- a/azure-ai-search/package.json +++ b/azure-ai-search/package.json @@ -1,6 +1,6 @@ { "name": "@ignitionai/azure-ai-search-mcp", - "version": "1.0.1", + "version": "1.0.2", "description": "Complete Azure AI Search MCP server with vector search, semantic search, and document management", "type": "module", "bin": { diff --git a/azure-ai-search/server.ts b/azure-ai-search/server.ts index c73dba6..68d19f7 100644 --- a/azure-ai-search/server.ts +++ b/azure-ai-search/server.ts @@ -46,7 +46,7 @@ dotenv.config(); // Create server instance const server = new McpServer({ name: "AzureAISearchMCP", - version: "1.0.0", + version: "1.0.2", description: "MCP server for interacting with Azure AI Search" }); diff --git a/azure-ai-search/test-vector-filtering.js b/azure-ai-search/test-vector-filtering.js new file mode 100644 index 0000000..9ac4683 --- /dev/null +++ b/azure-ai-search/test-vector-filtering.js @@ -0,0 +1,150 @@ +#!/usr/bin/env node + +/** + * Test script to demonstrate vector field filtering + * This simulates the exact data structure from the LangSmith trace + */ + +// Simulate the AzureSearchTools class methods for testing +class TestVectorFilter { + /** + * Determines if a field is a vector field that should be removed + */ + isVectorField(key, value) { + const keyLower = key.toLowerCase(); + + // Check field name patterns + if (keyLower.includes('vector') || + keyLower.includes('embedding') || + key.endsWith('Vector') || + key.endsWith('_vector') || + key.endsWith('Embedding') || + key.endsWith('_embedding')) { + return true; + } + + // Check if it's a large numeric array (likely a vector) + if (Array.isArray(value) && + value.length > 50 && // Lower threshold for safety + value.length < 10000 && // Reasonable upper bound for vectors + value.every((item) => typeof item === 'number')) { + return true; + } + + return false; + } + + /** + * Recursively removes vector fields from an object + */ + filterVectorFieldsRecursive(obj) { + if (!obj || typeof obj !== 'object') return; + + Object.keys(obj).forEach(key => { + const value = obj[key]; + + // Check if this field should be removed + if (this.isVectorField(key, value)) { + console.log(`🗑️ REMOVING vector field: "${key}" (${Array.isArray(value) ? `array of ${value.length} numbers` : typeof value})`); + delete obj[key]; + return; + } + + // Recursively filter nested objects + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + this.filterVectorFieldsRecursive(value); + } + }); + } + + /** + * Removes vector fields from search results + */ + removeVectorFields(results) { + return results.map(result => { + if (!result || typeof result !== 'object') return result; + + const filteredResult = { ...result }; + + // Recursively filter nested objects (like document property) + this.filterVectorFieldsRecursive(filteredResult); + + return filteredResult; + }); + } +} + +// Test data simulating the exact structure from LangSmith trace +const mockSearchResults = [ + { + score: 15.204945, + document: { + chunk_id: "fc37433d5ae7_aHR0cHM6Ly9zdG9yYWdlcG9ydGZvbGlv", + parent_id: "aHR0cHM6Ly9zdG9yYWdlcG9ydGZvbGlv", + chunk: "alpha, beta et gamma) le 16 décembre 2014, Release Version 1.0.", + title: "galnet.fr_elite-dangerous-release-s1_.md", + header_1: "Elite Dangerous - Saison 1 RELEASE", + header_2: "Elite Dangerous, Saison 1", + header_3: "", + // This is the problematic field from the trace - huge vector array + text_vector: Array.from({length: 1536}, (_, i) => Math.random() * 2 - 1), // 1536 random numbers + // Additional vector fields to test + embedding_field: Array.from({length: 768}, (_, i) => Math.random()), + content_Vector: Array.from({length: 512}, (_, i) => Math.random()), + search_embedding: Array.from({length: 256}, (_, i) => Math.random()) + } + }, + { + score: 15.084188, + document: { + chunk_id: "38fab6c7b272_aHR0cHM6Ly9zdG9yYWdlcG9ydGZvbGlv", + chunk: "Les bêtas dans Elite: Dangerous sont toujours des moments assez 'épiques'", + title: "galnet.fr_beta-elite-dangerous-faq_.md", + text_vector: Array.from({length: 1536}, (_, i) => Math.random() * 2 - 1), + // Test edge cases + small_array: [1, 2, 3], // Should NOT be removed (too small) + large_string_array: Array.from({length: 100}, (_, i) => `item${i}`), // Should NOT be removed (not numbers) + mixed_array: [1, "text", 3] // Should NOT be removed (mixed types) + } + } +]; + +console.log("🧪 TESTING VECTOR FIELD FILTERING"); +console.log("=" .repeat(50)); + +const filter = new TestVectorFilter(); + +console.log("\n📊 BEFORE FILTERING:"); +console.log(`Result 1 - document keys: ${Object.keys(mockSearchResults[0].document).join(', ')}`); +console.log(`Result 1 - text_vector length: ${mockSearchResults[0].document.text_vector.length}`); +console.log(`Result 2 - document keys: ${Object.keys(mockSearchResults[1].document).join(', ')}`); + +console.log("\n🔄 APPLYING FILTER..."); +const filteredResults = filter.removeVectorFields(mockSearchResults); + +console.log("\n✅ AFTER FILTERING:"); +console.log(`Result 1 - document keys: ${Object.keys(filteredResults[0].document).join(', ')}`); +console.log(`Result 2 - document keys: ${Object.keys(filteredResults[1].document).join(', ')}`); + +console.log("\n🎯 VERIFICATION:"); +const hasVectorFields = filteredResults.some(result => { + const doc = result.document; + return Object.keys(doc).some(key => + key.toLowerCase().includes('vector') || + key.toLowerCase().includes('embedding') || + (Array.isArray(doc[key]) && doc[key].length > 50 && doc[key].every(item => typeof item === 'number')) + ); +}); + +if (hasVectorFields) { + console.log("❌ FAILED: Vector fields still present!"); +} else { + console.log("✅ SUCCESS: No vector fields found in filtered results!"); +} + +console.log("\n📋 PRESERVED FIELDS:"); +filteredResults.forEach((result, index) => { + console.log(`Result ${index + 1}:`, Object.keys(result.document).join(', ')); +}); + +console.log("\n🎉 Test completed!");