-
Notifications
You must be signed in to change notification settings - Fork 7
Add Vector Search plugin #200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,178 @@ | ||
| import { Context, type WorkspaceClient } from "@databricks/sdk-experimental"; | ||
| import type { TelemetryOptions } from "shared"; | ||
| import { createLogger } from "../../logging/logger"; | ||
| import { | ||
| type Span, | ||
| SpanKind, | ||
| SpanStatusCode, | ||
| TelemetryManager, | ||
| } from "../../telemetry"; | ||
| import type { TelemetryProvider } from "../../telemetry"; | ||
| import type { | ||
| VectorSearchConnectorConfig, | ||
| VsNextPageParams, | ||
| VsQueryParams, | ||
| VsRawResponse, | ||
| } from "./types"; | ||
|
|
||
| const logger = createLogger("connectors:vector-search"); | ||
|
|
||
| export class VectorSearchConnector { | ||
| private readonly config: Required<VectorSearchConnectorConfig>; | ||
| private readonly telemetry: TelemetryProvider; | ||
|
|
||
| constructor(config: VectorSearchConnectorConfig = {}) { | ||
| this.config = { | ||
| timeout: config.timeout ?? 30_000, | ||
| }; | ||
| this.telemetry = TelemetryManager.getProvider( | ||
| "vector-search", | ||
| config.telemetry, | ||
| ); | ||
| } | ||
|
|
||
| async query( | ||
| workspaceClient: WorkspaceClient, | ||
| params: VsQueryParams, | ||
| signal?: AbortSignal, | ||
| ): Promise<VsRawResponse> { | ||
| if (signal?.aborted) { | ||
| throw new Error("Query cancelled before execution"); | ||
| } | ||
|
|
||
| const body: Record<string, unknown> = { | ||
| columns: params.columns, | ||
| num_results: params.numResults, | ||
| query_type: params.queryType.toUpperCase(), | ||
| debug_level: 1, | ||
| }; | ||
|
|
||
| if (params.queryText) body.query_text = params.queryText; | ||
| if (params.queryVector) body.query_vector = params.queryVector; | ||
| if (params.filters && Object.keys(params.filters).length > 0) { | ||
| body.filters = params.filters; | ||
| } | ||
| if (params.reranker) { | ||
| body.reranker = { | ||
| model: "databricks_reranker", | ||
| parameters: { columns_to_rerank: params.reranker.columnsToRerank }, | ||
| }; | ||
| } | ||
|
|
||
| logger.debug( | ||
| "Querying VS index %s (type=%s, num_results=%d)", | ||
| params.indexName, | ||
| params.queryType, | ||
| params.numResults, | ||
| ); | ||
|
|
||
| return this.telemetry.startActiveSpan( | ||
| "vector-search.query", | ||
| { | ||
| kind: SpanKind.CLIENT, | ||
| attributes: { | ||
| "db.system": "databricks", | ||
| "vs.index_name": params.indexName, | ||
| "vs.query_type": params.queryType, | ||
| "vs.num_results": params.numResults, | ||
| "vs.has_filters": !!( | ||
| params.filters && Object.keys(params.filters).length > 0 | ||
| ), | ||
| "vs.has_reranker": !!params.reranker, | ||
| }, | ||
| }, | ||
| async (span: Span) => { | ||
| const startTime = Date.now(); | ||
| try { | ||
| const response = (await workspaceClient.apiClient.request({ | ||
| method: "POST", | ||
| path: `/api/2.0/vector-search/indexes/${params.indexName}/query`, | ||
| body, | ||
| headers: new Headers({ "Content-Type": "application/json" }), | ||
| raw: false, | ||
| query: {}, | ||
| })) as VsRawResponse; | ||
|
Comment on lines
+87
to
+94
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are some abort-control checks and the timeout is defined in the config. But the query is not passing the signal to the request. Am I missing something? 🤔 |
||
|
|
||
| const duration = Date.now() - startTime; | ||
| span.setAttribute("vs.result_count", response.result.row_count); | ||
| span.setAttribute("vs.query_time_ms", response.debug_info?.response_time ?? 0); | ||
| span.setAttribute("vs.duration_ms", duration); | ||
| span.setStatus({ code: SpanStatusCode.OK }); | ||
|
|
||
| logger.event()?.setContext("vector-search", { | ||
| index_name: params.indexName, | ||
| query_type: params.queryType, | ||
| result_count: response.result.row_count, | ||
| query_time_ms: response.debug_info?.response_time ?? 0, | ||
| duration_ms: duration, | ||
| }); | ||
|
|
||
| return response; | ||
| } catch (error) { | ||
| span.recordException(error as Error); | ||
| span.setStatus({ | ||
| code: SpanStatusCode.ERROR, | ||
| message: error instanceof Error ? error.message : String(error), | ||
| }); | ||
| throw error; | ||
| } | ||
| }, | ||
| { name: "vector-search", includePrefix: true }, | ||
| ); | ||
| } | ||
|
|
||
| async queryNextPage( | ||
| workspaceClient: WorkspaceClient, | ||
| params: VsNextPageParams, | ||
| signal?: AbortSignal, | ||
| ): Promise<VsRawResponse> { | ||
| if (signal?.aborted) { | ||
| throw new Error("Query cancelled before execution"); | ||
| } | ||
|
|
||
| logger.debug( | ||
| "Fetching next page for index %s (endpoint=%s)", | ||
| params.indexName, | ||
| params.endpointName, | ||
| ); | ||
|
|
||
| return this.telemetry.startActiveSpan( | ||
| "vector-search.queryNextPage", | ||
| { | ||
| kind: SpanKind.CLIENT, | ||
| attributes: { | ||
| "db.system": "databricks", | ||
| "vs.index_name": params.indexName, | ||
| "vs.endpoint_name": params.endpointName, | ||
| }, | ||
| }, | ||
| async (span: Span) => { | ||
| try { | ||
| const response = (await workspaceClient.apiClient.request({ | ||
| method: "POST", | ||
| path: `/api/2.0/vector-search/indexes/${params.indexName}/query-next-page`, | ||
| body: { | ||
| endpoint_name: params.endpointName, | ||
| page_token: params.pageToken, | ||
| }, | ||
| headers: new Headers({ "Content-Type": "application/json" }), | ||
| raw: false, | ||
| query: {}, | ||
| })) as VsRawResponse; | ||
|
|
||
| span.setAttribute("vs.result_count", response.result.row_count); | ||
| span.setStatus({ code: SpanStatusCode.OK }); | ||
| return response; | ||
| } catch (error) { | ||
| span.recordException(error as Error); | ||
| span.setStatus({ | ||
| code: SpanStatusCode.ERROR, | ||
| message: error instanceof Error ? error.message : String(error), | ||
| }); | ||
| throw error; | ||
| } | ||
| }, | ||
| { name: "vector-search", includePrefix: true }, | ||
| ); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| export * from "./client"; | ||
| export * from "./types"; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| import type { TelemetryOptions } from "shared"; | ||
|
|
||
| export interface VectorSearchConnectorConfig { | ||
| timeout?: number; | ||
| telemetry?: TelemetryOptions; | ||
| } | ||
|
|
||
| export interface VsQueryParams { | ||
| indexName: string; | ||
| queryText?: string; | ||
| queryVector?: number[]; | ||
| columns: string[]; | ||
| numResults: number; | ||
| queryType: "ann" | "hybrid" | "full_text"; | ||
| filters?: Record<string, string | number | boolean | (string | number)[]>; | ||
| reranker?: { columnsToRerank: string[] }; | ||
| } | ||
|
|
||
| export interface VsNextPageParams { | ||
| indexName: string; | ||
| endpointName: string; | ||
| pageToken: string; | ||
| } | ||
|
|
||
| export interface VsRawResponse { | ||
| manifest: { | ||
| column_count: number; | ||
| columns: Array<{ name: string; type?: string }>; | ||
| }; | ||
| result: { | ||
| row_count: number; | ||
| data_array: unknown[][]; | ||
| }; | ||
| next_page_token?: string | null; | ||
| debug_info?: { | ||
| response_time?: number; | ||
| ann_time?: number; | ||
| embedding_gen_time?: number; | ||
| latency_ms?: number; | ||
| [key: string]: unknown; | ||
| }; | ||
| } |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please also remember about the docs (docs/docs/plugins) - we need to ensure the plugin is documented. Thank you! |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| import type { PluginExecuteConfig } from "shared"; | ||
|
|
||
| export const vectorSearchDefaults: PluginExecuteConfig = { | ||
| cache: { enabled: false }, | ||
| retry: { enabled: true, initialDelay: 1000, attempts: 3 }, | ||
| timeout: 30_000, | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| export * from "./vector-search"; | ||
| export * from "./types"; |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be great to update the |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| { | ||
| "$schema": "https://databricks.github.io/appkit/schemas/plugin-manifest.schema.json", | ||
| "name": "vector-search", | ||
| "displayName": "Vector Search Plugin", | ||
| "description": "Query Databricks Vector Search indexes with built-in hybrid search, reranking, and pagination", | ||
| "resources": { | ||
| "required": [], | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we have a Vector Search endpoint / index resources here? This part is crucial for the |
||
| "optional": [] | ||
| }, | ||
| "config": { | ||
| "schema": { | ||
| "type": "object", | ||
| "properties": { | ||
| "timeout": { | ||
| "type": "number", | ||
| "default": 30000, | ||
| "description": "Query execution timeout in milliseconds" | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't you intend to use
../defaults.tsinstead of hardcoding the default here?