diff --git a/js/ai/src/document.ts b/js/ai/src/document.ts index 67a03fd3ca..4f10cafba5 100644 --- a/js/ai/src/document.ts +++ b/js/ai/src/document.ts @@ -46,6 +46,13 @@ export class Document implements DocumentData { this.metadata = deepCopy(data.metadata); } + static fromParts(content: Part[], metadata?: Record) { + return new Document({ + content, + metadata, + }); + } + static fromText(text: string, metadata?: Record) { return new Document({ content: [{ text }], diff --git a/js/plugins/google-genai/src/googleai/embedder.ts b/js/plugins/google-genai/src/googleai/embedder.ts index 5ce609fc48..6c48cb662b 100644 --- a/js/plugins/google-genai/src/googleai/embedder.ts +++ b/js/plugins/google-genai/src/googleai/embedder.ts @@ -24,6 +24,7 @@ import { } from 'genkit'; import { embedderRef } from 'genkit/embedder'; import { embedder as pluginEmbedder } from 'genkit/plugin'; +import { toGeminiMessage } from '../common/converters.js'; import { embedContent } from './client.js'; import { ClientOptions, @@ -86,9 +87,19 @@ function commonRef( const GENERIC_MODEL = commonRef('embedder'); const KNOWN_MODELS = { + 'gemini-embedding-2-preview': commonRef('gemini-embedding-2-preview', { + dimensions: 3072, + supports: { + input: ['text', 'image', 'video'], + }, + }), 'gemini-embedding-001': commonRef('gemini-embedding-001'), -}; +} as const; export type KnownModels = keyof typeof KNOWN_MODELS; // For autocomplete +export type EmbedderModelName = `gemini-embedding-${string}`; +export function isEmbedderName(value: string): value is EmbedderModelName { + return value.startsWith('gemini-embedding-'); +} export function model( version: string, @@ -98,8 +109,9 @@ export function model( return embedderRef({ name: `googleai/${name}`, config, - configSchema: GENERIC_MODEL.configSchema, - info: { + configSchema: + KNOWN_MODELS[name]?.configSchema ?? GENERIC_MODEL.configSchema, + info: KNOWN_MODELS[name]?.info ?? { ...GENERIC_MODEL.info, }, }); @@ -159,10 +171,7 @@ export function defineEmbedder( { taskType: request.options?.taskType, title: request.options?.title, - content: { - role: '', - parts: [{ text: doc.text }], - }, + content: toGeminiMessage({ role: 'user', content: doc.content }), outputDimensionality: request.options?.outputDimensionality, } as EmbedContentRequest, clientOptions diff --git a/js/plugins/google-genai/src/googleai/index.ts b/js/plugins/google-genai/src/googleai/index.ts index b107c04c12..f08f25cc09 100644 --- a/js/plugins/google-genai/src/googleai/index.ts +++ b/js/plugins/google-genai/src/googleai/index.ts @@ -164,7 +164,7 @@ export type GoogleAIPlugin = { model(name: string, config?: any): ModelReference; embedder( - name: string, + name: embedder.KnownModels | (embedder.EmbedderModelName & {}), config?: embedder.EmbeddingConfig ): EmbedderReference; }; diff --git a/js/plugins/google-genai/src/vertexai/embedder.ts b/js/plugins/google-genai/src/vertexai/embedder.ts index cbf5025284..ab55be3fb2 100644 --- a/js/plugins/google-genai/src/vertexai/embedder.ts +++ b/js/plugins/google-genai/src/vertexai/embedder.ts @@ -108,6 +108,11 @@ export const KNOWN_MODELS = { supports: { input: ['text'] }, }), } as const; +export type KnownModels = keyof typeof KNOWN_MODELS; // For autocomplete +export type EmbedderModelName = `${string}embedding${string}`; +export function isEmbedderName(value: string): value is EmbedderModelName { + return value.includes('embedding'); +} export function model( version: string, diff --git a/js/plugins/google-genai/src/vertexai/index.ts b/js/plugins/google-genai/src/vertexai/index.ts index 488d4a94f7..7a0daa3325 100644 --- a/js/plugins/google-genai/src/vertexai/index.ts +++ b/js/plugins/google-genai/src/vertexai/index.ts @@ -146,7 +146,7 @@ export type VertexAIPlugin = { model(name: string, config?: any): ModelReference; embedder( - name: string, + name: embedder.KnownModels | (embedder.EmbedderModelName & {}), config?: embedder.EmbeddingConfig ): EmbedderReference; }; diff --git a/js/plugins/google-genai/tests/googleai/embedder_test.ts b/js/plugins/google-genai/tests/googleai/embedder_test.ts index f9979a3227..744a157f1e 100644 --- a/js/plugins/google-genai/tests/googleai/embedder_test.ts +++ b/js/plugins/google-genai/tests/googleai/embedder_test.ts @@ -236,7 +236,7 @@ describe('defineGoogleAIEmbedder', () => { const fetchArgs1 = fetchStub.firstCall.args; assert.strictEqual(fetchArgs1[0], expectedUrl); const expectedRequest1 = { - content: { role: '', parts: [{ text: 'Hello' }] }, + content: { role: 'user', parts: [{ text: 'Hello' }] }, }; assert.deepStrictEqual(JSON.parse(fetchArgs1[1].body), expectedRequest1); @@ -244,7 +244,7 @@ describe('defineGoogleAIEmbedder', () => { const fetchArgs2 = fetchStub.secondCall.args; assert.strictEqual(fetchArgs2[0], expectedUrl); const expectedRequest2 = { - content: { role: '', parts: [{ text: 'World' }] }, + content: { role: 'user', parts: [{ text: 'World' }] }, }; assert.deepStrictEqual(JSON.parse(fetchArgs2[1].body), expectedRequest2); @@ -253,6 +253,47 @@ describe('defineGoogleAIEmbedder', () => { }); }); + it('calls embedContent with multimodal input', async () => { + const embedder = defineEmbedder( + 'gemini-embedding-2-preview', + defaultPluginOptions + ); + mockFetchResponse({ embedding: { values: [0.5, 0.6] } }); + + const doc = new Document({ + content: [ + { text: 'Look at this' }, + { + media: { + url: 'data:image/jpeg;base64,12345', + contentType: 'image/jpeg', + }, + }, + ], + }); + const result = await embedder.run({ input: [doc] }); + + sinon.assert.calledOnce(fetchStub); + const fetchArgs = fetchStub.firstCall.args; + const expectedUrl = + 'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-2-preview:embedContent'; + assert.strictEqual(fetchArgs[0], expectedUrl); + + const expectedRequest = { + content: { + role: 'user', + parts: [ + { text: 'Look at this' }, + { inlineData: { mimeType: 'image/jpeg', data: '12345' } }, + ], + }, + }; + assert.deepStrictEqual(JSON.parse(fetchArgs[1].body), expectedRequest); + assert.deepStrictEqual(result.result, { + embeddings: [{ embedding: [0.5, 0.6] }], + }); + }); + it('calls embedContent with taskType, title, and outputDimensionality options', async () => { const embedder = defineEmbedder( 'gemini-embedding-001', @@ -275,7 +316,7 @@ describe('defineGoogleAIEmbedder', () => { assert.strictEqual(body.title, 'Doc Title'); assert.strictEqual(body.outputDimensionality, 256); assert.deepStrictEqual(body.content, { - role: '', + role: 'user', parts: [{ text: 'Hello' }], }); }); diff --git a/js/testapps/basic-gemini/src/index.ts b/js/testapps/basic-gemini/src/index.ts index eaf19a100c..937570b08e 100644 --- a/js/testapps/basic-gemini/src/index.ts +++ b/js/testapps/basic-gemini/src/index.ts @@ -17,6 +17,7 @@ import { googleAI } from '@genkit-ai/google-genai'; import * as fs from 'fs'; import { + Document, genkit, z, type MediaPart, @@ -796,6 +797,44 @@ ai.defineFlow( } ); +// Embed text +ai.defineFlow('embed-text', async () => { + const embeddings = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: 'Albert Einstein was a German-born theoretical physicist.', + options: { + outputDimensionality: 256, + taskType: 'RETRIEVAL_DOCUMENT', + title: 'Albert Einstein', // Valid when taskType is RETRIEVAL_DOCUMENT + }, + }); + + return embeddings; +}); + +// Embed multimodal content +ai.defineFlow('embed-multimodal', async () => { + const photoBase64 = fs.readFileSync('photo.jpg', { encoding: 'base64' }); + + const embeddings = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-2-preview'), + content: Document.fromParts([ + { text: 'A picture of Albert Einstein.' }, + { + media: { + contentType: 'image/jpeg', + url: `data:image/jpeg;base64,${photoBase64}`, + }, + }, + ]), + options: { + outputDimensionality: 256, + }, + }); + + return embeddings; +}); + // Deep research example ai.defineFlow('deep-research', async (_, { sendChunk }) => { let { operation } = await ai.generate({ diff --git a/js/testapps/flow-simple-ai/src/index.ts b/js/testapps/flow-simple-ai/src/index.ts index 99ac41ba4e..56181bd97e 100644 --- a/js/testapps/flow-simple-ai/src/index.ts +++ b/js/testapps/flow-simple-ai/src/index.ts @@ -945,13 +945,13 @@ ai.defineFlow('embedders-tester', async () => { console.log( await ai.embed({ content: 'hello world', - embedder: googleAI.embedder('text-embedding-004'), + embedder: googleAI.embedder('gemini-embedding-001'), }) ); console.log( await ai.embed({ content: 'hello world', - embedder: vertexAI.embedder('text-embedding-004'), + embedder: vertexAI.embedder('gemini-embedding-001'), }) ); }); diff --git a/js/testapps/rag/src/pdf-rag-firebase.ts b/js/testapps/rag/src/pdf-rag-firebase.ts index 0282e7550a..188096c2e4 100644 --- a/js/testapps/rag/src/pdf-rag-firebase.ts +++ b/js/testapps/rag/src/pdf-rag-firebase.ts @@ -160,7 +160,7 @@ const indexConfig = { collection: 'pdf-qa', contentField: 'facts', vectorField: 'embedding', - embedder: googleAI.embedder('text-embedding-004'), + embedder: googleAI.embedder('gemini-embedding-001'), }; const chunkingConfig = { diff --git a/samples/js-menu/src/genkit.ts b/samples/js-menu/src/genkit.ts index 0d51b186ba..765b2ffee3 100644 --- a/samples/js-menu/src/genkit.ts +++ b/samples/js-menu/src/genkit.ts @@ -26,7 +26,7 @@ export const ai = genkit({ devLocalVectorstore([ { indexName: 'menu-items', - embedder: googleAI.embedder('text-embedding-004'), + embedder: googleAI.embedder('gemini-embedding-001'), embedderOptions: { taskType: 'RETRIEVAL_DOCUMENT' }, }, ]),