diff --git a/.gitignore b/.gitignore
index 3b6cc969..4c51d5b1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ coverage
*.tsbuildinfo
.turbo
+
+# AppKit type generator caches
+.databricks
diff --git a/apps/dev-playground/.env.dist b/apps/dev-playground/.env.dist
index 23c3265a..80eda94b 100644
--- a/apps/dev-playground/.env.dist
+++ b/apps/dev-playground/.env.dist
@@ -9,6 +9,7 @@ OTEL_SERVICE_NAME='dev-playground'
DATABRICKS_VOLUME_PLAYGROUND=
DATABRICKS_VOLUME_OTHER=
DATABRICKS_GENIE_SPACE_ID=
+DATABRICKS_SERVING_ENDPOINT=
LAKEBASE_ENDPOINT='' # Run: databricks postgres list-endpoints projects/{project-id}/branches/{branch-id} — use the `name` field from the output
PGHOST=
PGUSER=
diff --git a/apps/dev-playground/client/.gitignore b/apps/dev-playground/client/.gitignore
index a547bf36..267b28f3 100644
--- a/apps/dev-playground/client/.gitignore
+++ b/apps/dev-playground/client/.gitignore
@@ -12,6 +12,9 @@ dist
dist-ssr
*.local
+# Auto-generated types (endpoint-specific, varies per developer)
+src/appKitServingTypes.d.ts
+
# Editor directories and files
.vscode/*
!.vscode/extensions.json
diff --git a/apps/dev-playground/client/src/routeTree.gen.ts b/apps/dev-playground/client/src/routeTree.gen.ts
index c4c38d14..99ac75fc 100644
--- a/apps/dev-playground/client/src/routeTree.gen.ts
+++ b/apps/dev-playground/client/src/routeTree.gen.ts
@@ -12,6 +12,7 @@ import { Route as rootRouteImport } from './routes/__root'
import { Route as TypeSafetyRouteRouteImport } from './routes/type-safety.route'
import { Route as TelemetryRouteRouteImport } from './routes/telemetry.route'
import { Route as SqlHelpersRouteRouteImport } from './routes/sql-helpers.route'
+import { Route as ServingRouteRouteImport } from './routes/serving.route'
import { Route as ReconnectRouteRouteImport } from './routes/reconnect.route'
import { Route as LakebaseRouteRouteImport } from './routes/lakebase.route'
import { Route as GenieRouteRouteImport } from './routes/genie.route'
@@ -37,6 +38,11 @@ const SqlHelpersRouteRoute = SqlHelpersRouteRouteImport.update({
path: '/sql-helpers',
getParentRoute: () => rootRouteImport,
} as any)
+const ServingRouteRoute = ServingRouteRouteImport.update({
+ id: '/serving',
+ path: '/serving',
+ getParentRoute: () => rootRouteImport,
+} as any)
const ReconnectRouteRoute = ReconnectRouteRouteImport.update({
id: '/reconnect',
path: '/reconnect',
@@ -93,6 +99,7 @@ export interface FileRoutesByFullPath {
'/genie': typeof GenieRouteRoute
'/lakebase': typeof LakebaseRouteRoute
'/reconnect': typeof ReconnectRouteRoute
+ '/serving': typeof ServingRouteRoute
'/sql-helpers': typeof SqlHelpersRouteRoute
'/telemetry': typeof TelemetryRouteRoute
'/type-safety': typeof TypeSafetyRouteRoute
@@ -107,6 +114,7 @@ export interface FileRoutesByTo {
'/genie': typeof GenieRouteRoute
'/lakebase': typeof LakebaseRouteRoute
'/reconnect': typeof ReconnectRouteRoute
+ '/serving': typeof ServingRouteRoute
'/sql-helpers': typeof SqlHelpersRouteRoute
'/telemetry': typeof TelemetryRouteRoute
'/type-safety': typeof TypeSafetyRouteRoute
@@ -122,6 +130,7 @@ export interface FileRoutesById {
'/genie': typeof GenieRouteRoute
'/lakebase': typeof LakebaseRouteRoute
'/reconnect': typeof ReconnectRouteRoute
+ '/serving': typeof ServingRouteRoute
'/sql-helpers': typeof SqlHelpersRouteRoute
'/telemetry': typeof TelemetryRouteRoute
'/type-safety': typeof TypeSafetyRouteRoute
@@ -138,6 +147,7 @@ export interface FileRouteTypes {
| '/genie'
| '/lakebase'
| '/reconnect'
+ | '/serving'
| '/sql-helpers'
| '/telemetry'
| '/type-safety'
@@ -152,6 +162,7 @@ export interface FileRouteTypes {
| '/genie'
| '/lakebase'
| '/reconnect'
+ | '/serving'
| '/sql-helpers'
| '/telemetry'
| '/type-safety'
@@ -166,6 +177,7 @@ export interface FileRouteTypes {
| '/genie'
| '/lakebase'
| '/reconnect'
+ | '/serving'
| '/sql-helpers'
| '/telemetry'
| '/type-safety'
@@ -181,6 +193,7 @@ export interface RootRouteChildren {
GenieRouteRoute: typeof GenieRouteRoute
LakebaseRouteRoute: typeof LakebaseRouteRoute
ReconnectRouteRoute: typeof ReconnectRouteRoute
+ ServingRouteRoute: typeof ServingRouteRoute
SqlHelpersRouteRoute: typeof SqlHelpersRouteRoute
TelemetryRouteRoute: typeof TelemetryRouteRoute
TypeSafetyRouteRoute: typeof TypeSafetyRouteRoute
@@ -209,6 +222,13 @@ declare module '@tanstack/react-router' {
preLoaderRoute: typeof SqlHelpersRouteRouteImport
parentRoute: typeof rootRouteImport
}
+ '/serving': {
+ id: '/serving'
+ path: '/serving'
+ fullPath: '/serving'
+ preLoaderRoute: typeof ServingRouteRouteImport
+ parentRoute: typeof rootRouteImport
+ }
'/reconnect': {
id: '/reconnect'
path: '/reconnect'
@@ -285,6 +305,7 @@ const rootRouteChildren: RootRouteChildren = {
GenieRouteRoute: GenieRouteRoute,
LakebaseRouteRoute: LakebaseRouteRoute,
ReconnectRouteRoute: ReconnectRouteRoute,
+ ServingRouteRoute: ServingRouteRoute,
SqlHelpersRouteRoute: SqlHelpersRouteRoute,
TelemetryRouteRoute: TelemetryRouteRoute,
TypeSafetyRouteRoute: TypeSafetyRouteRoute,
diff --git a/apps/dev-playground/client/src/routes/__root.tsx b/apps/dev-playground/client/src/routes/__root.tsx
index 5cf74ce3..35a2282b 100644
--- a/apps/dev-playground/client/src/routes/__root.tsx
+++ b/apps/dev-playground/client/src/routes/__root.tsx
@@ -104,6 +104,14 @@ function RootComponent() {
Files
+
+
+
diff --git a/apps/dev-playground/client/src/routes/index.tsx b/apps/dev-playground/client/src/routes/index.tsx
index e331d93c..934b1467 100644
--- a/apps/dev-playground/client/src/routes/index.tsx
+++ b/apps/dev-playground/client/src/routes/index.tsx
@@ -218,6 +218,24 @@ function IndexRoute() {
+
+
+
+
+ Model Serving
+
+
+ Chat with a Databricks Model Serving endpoint using streaming
+ completions with real-time SSE responses.
+
+
+
+
diff --git a/apps/dev-playground/client/src/routes/serving.route.tsx b/apps/dev-playground/client/src/routes/serving.route.tsx
new file mode 100644
index 00000000..770d42f4
--- /dev/null
+++ b/apps/dev-playground/client/src/routes/serving.route.tsx
@@ -0,0 +1,148 @@
+import { useServingStream } from "@databricks/appkit-ui/react";
+import { createFileRoute } from "@tanstack/react-router";
+import { useEffect, useRef, useState } from "react";
+
+export const Route = createFileRoute("/serving")({
+ component: ServingRoute,
+});
+
+interface Message {
+ id: string;
+ role: "user" | "assistant";
+ content: string;
+}
+
+function extractContent(chunk: unknown): string {
+ return (
+ (chunk as { choices?: { delta?: { content?: string } }[] })?.choices?.[0]
+ ?.delta?.content ?? ""
+ );
+}
+
+function ServingRoute() {
+ const [input, setInput] = useState("");
+ const [messages, setMessages] = useState
([]);
+
+ const { stream, chunks, streaming, error, reset } = useServingStream({
+ messages: [],
+ });
+
+ const streamingContent = chunks.map(extractContent).join("");
+
+ // Commit assistant message when streaming transitions from true → false
+ const prevStreamingRef = useRef(false);
+ useEffect(() => {
+ if (prevStreamingRef.current && !streaming && streamingContent) {
+ setMessages((prev) => [
+ ...prev,
+ {
+ id: crypto.randomUUID(),
+ role: "assistant",
+ content: streamingContent,
+ },
+ ]);
+ reset();
+ }
+ prevStreamingRef.current = streaming;
+ }, [streaming, streamingContent, reset]);
+
+ function handleSubmit(e: React.FormEvent) {
+ e.preventDefault();
+ if (!input.trim() || streaming) return;
+
+ const userMessage: Message = {
+ id: crypto.randomUUID(),
+ role: "user",
+ content: input.trim(),
+ };
+
+ const fullMessages = [
+ ...messages.map(({ role, content }) => ({ role, content })),
+ { role: "user" as const, content: userMessage.content },
+ ];
+
+ setMessages((prev) => [...prev, userMessage]);
+ setInput("");
+ reset();
+ stream({ messages: fullMessages });
+ }
+
+ return (
+
+
+
+
+
+ Model Serving
+
+
+ Chat with a Databricks Model Serving endpoint. Set{" "}
+
+ DATABRICKS_SERVING_ENDPOINT
+ {" "}
+ to enable.
+
+
+
+
+ {/* Messages area */}
+
+ {messages.map((msg) => (
+
+ ))}
+
+ {/* Streaming response */}
+ {streaming && (
+
+
+
+ {streamingContent || "..."}
+
+
+
+ )}
+
+ {error && (
+
+ Error: {error}
+
+ )}
+
+
+ {/* Input area */}
+
+
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/vite.config.ts b/apps/dev-playground/client/vite.config.ts
index f892c62f..5f37880b 100644
--- a/apps/dev-playground/client/vite.config.ts
+++ b/apps/dev-playground/client/vite.config.ts
@@ -1,4 +1,5 @@
import path from "node:path";
+import { appKitServingTypesPlugin } from "@databricks/appkit";
import { tanstackRouter } from "@tanstack/router-plugin/vite";
import react from "@vitejs/plugin-react";
import { defineConfig } from "vite";
@@ -11,6 +12,7 @@ export default defineConfig({
target: "react",
autoCodeSplitting: process.env.NODE_ENV !== "development",
}),
+ appKitServingTypesPlugin(),
],
server: {
hmr: {
diff --git a/apps/dev-playground/server/index.ts b/apps/dev-playground/server/index.ts
index a4b6a2c6..af05b11f 100644
--- a/apps/dev-playground/server/index.ts
+++ b/apps/dev-playground/server/index.ts
@@ -1,5 +1,12 @@
import "reflect-metadata";
-import { analytics, createApp, files, genie, server } from "@databricks/appkit";
+import {
+ analytics,
+ createApp,
+ files,
+ genie,
+ server,
+ serving,
+} from "@databricks/appkit";
import { WorkspaceClient } from "@databricks/sdk-experimental";
import { lakebaseExamples } from "./lakebase-examples-plugin";
import { reconnect } from "./reconnect-plugin";
@@ -26,6 +33,7 @@ createApp({
}),
lakebaseExamples(),
files(),
+ serving(),
],
...(process.env.APPKIT_E2E_TEST && { client: createMockClient() }),
}).then((appkit) => {
diff --git a/docs/docs/api/appkit/Function.appKitServingTypesPlugin.md b/docs/docs/api/appkit/Function.appKitServingTypesPlugin.md
new file mode 100644
index 00000000..bc28660a
--- /dev/null
+++ b/docs/docs/api/appkit/Function.appKitServingTypesPlugin.md
@@ -0,0 +1,24 @@
+# Function: appKitServingTypesPlugin()
+
+```ts
+function appKitServingTypesPlugin(options?: AppKitServingTypesPluginOptions): Plugin$1;
+```
+
+Vite plugin to generate TypeScript types for AppKit serving endpoints.
+Fetches OpenAPI schemas from Databricks and generates a .d.ts with
+ServingEndpointRegistry module augmentation.
+
+Endpoint discovery order:
+1. Explicit `endpoints` option (override)
+2. AST extraction from server file (server/index.ts or server/server.ts)
+3. DATABRICKS_SERVING_ENDPOINT env var (single default endpoint)
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `options?` | `AppKitServingTypesPluginOptions` |
+
+## Returns
+
+`Plugin$1`
diff --git a/docs/docs/api/appkit/Function.extractServingEndpoints.md b/docs/docs/api/appkit/Function.extractServingEndpoints.md
new file mode 100644
index 00000000..24a5b00d
--- /dev/null
+++ b/docs/docs/api/appkit/Function.extractServingEndpoints.md
@@ -0,0 +1,24 @@
+# Function: extractServingEndpoints()
+
+```ts
+function extractServingEndpoints(serverFilePath: string):
+ | Record
+ | null;
+```
+
+Extract serving endpoint config from a server file by AST-parsing it.
+Looks for `serving({ endpoints: { alias: { env: "..." }, ... } })` calls
+and extracts the endpoint alias names and their environment variable mappings.
+
+## Parameters
+
+| Parameter | Type | Description |
+| ------ | ------ | ------ |
+| `serverFilePath` | `string` | Absolute path to the server entry file |
+
+## Returns
+
+ \| `Record`\<`string`, [`EndpointConfig`](Interface.EndpointConfig.md)\>
+ \| `null`
+
+Extracted endpoint config, or null if not found or not extractable
diff --git a/docs/docs/api/appkit/Function.findServerFile.md b/docs/docs/api/appkit/Function.findServerFile.md
new file mode 100644
index 00000000..2ed4e268
--- /dev/null
+++ b/docs/docs/api/appkit/Function.findServerFile.md
@@ -0,0 +1,19 @@
+# Function: findServerFile()
+
+```ts
+function findServerFile(basePath: string): string | null;
+```
+
+Find the server entry file by checking candidate paths in order.
+
+## Parameters
+
+| Parameter | Type | Description |
+| ------ | ------ | ------ |
+| `basePath` | `string` | Project root directory to search from |
+
+## Returns
+
+`string` \| `null`
+
+Absolute path to the server file, or null if none found
diff --git a/docs/docs/api/appkit/Interface.EndpointConfig.md b/docs/docs/api/appkit/Interface.EndpointConfig.md
new file mode 100644
index 00000000..6ee94aa3
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.EndpointConfig.md
@@ -0,0 +1,21 @@
+# Interface: EndpointConfig
+
+## Properties
+
+### env
+
+```ts
+env: string;
+```
+
+Environment variable holding the endpoint name.
+
+***
+
+### servedModel?
+
+```ts
+optional servedModel: string;
+```
+
+Target a specific served model (bypasses traffic routing).
diff --git a/docs/docs/api/appkit/Interface.ServingEndpointEntry.md b/docs/docs/api/appkit/Interface.ServingEndpointEntry.md
new file mode 100644
index 00000000..fa054c3f
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ServingEndpointEntry.md
@@ -0,0 +1,27 @@
+# Interface: ServingEndpointEntry
+
+Shape of a single registry entry.
+
+## Properties
+
+### chunk
+
+```ts
+chunk: unknown;
+```
+
+***
+
+### request
+
+```ts
+request: Record;
+```
+
+***
+
+### response
+
+```ts
+response: unknown;
+```
diff --git a/docs/docs/api/appkit/Interface.ServingEndpointRegistry.md b/docs/docs/api/appkit/Interface.ServingEndpointRegistry.md
new file mode 100644
index 00000000..defe5270
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ServingEndpointRegistry.md
@@ -0,0 +1,5 @@
+# Interface: ServingEndpointRegistry
+
+Registry interface for serving endpoint type generation.
+Empty by default — augmented by the Vite type generator's `.d.ts` output via module augmentation.
+When populated, provides autocomplete for alias names and typed request/response/chunk per endpoint.
diff --git a/docs/docs/api/appkit/TypeAlias.ServingFactory.md b/docs/docs/api/appkit/TypeAlias.ServingFactory.md
new file mode 100644
index 00000000..9ccafef5
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.ServingFactory.md
@@ -0,0 +1,19 @@
+# Type Alias: ServingFactory
+
+```ts
+type ServingFactory = keyof ServingEndpointRegistry extends never ? (alias?: string) => ServingEndpointMethods : (alias: K) => ServingEndpointMethods;
+```
+
+Factory function returned by `AppKit.serving`.
+
+This is a conditional type that adapts based on whether `ServingEndpointRegistry`
+has been populated via module augmentation (generated by `appKitServingTypesPlugin()`):
+
+- **Registry empty (default):** `(alias?: string) => ServingEndpointMethods` —
+ accepts any alias string with untyped request/response/chunk.
+- **Registry populated:** `(alias: K) => ServingEndpointMethods<...>` —
+ restricts `alias` to known endpoint keys and infers typed request/response/chunk
+ from the registry entry.
+
+Run `appKitServingTypesPlugin()` in your Vite config to generate the registry
+augmentation and enable full type safety.
diff --git a/docs/docs/api/appkit/index.md b/docs/docs/api/appkit/index.md
index b5fb7ce0..faadf237 100644
--- a/docs/docs/api/appkit/index.md
+++ b/docs/docs/api/appkit/index.md
@@ -33,6 +33,7 @@ plugin architecture, and React integration.
| [BasePluginConfig](Interface.BasePluginConfig.md) | Base configuration interface for AppKit plugins |
| [CacheConfig](Interface.CacheConfig.md) | Configuration for the CacheInterceptor. Controls TTL, size limits, storage backend, and probabilistic cleanup. |
| [DatabaseCredential](Interface.DatabaseCredential.md) | Database credentials with OAuth token for Postgres connection |
+| [EndpointConfig](Interface.EndpointConfig.md) | - |
| [GenerateDatabaseCredentialRequest](Interface.GenerateDatabaseCredentialRequest.md) | Request parameters for generating database OAuth credentials |
| [ITelemetry](Interface.ITelemetry.md) | Plugin-facing interface for OpenTelemetry instrumentation. Provides a thin abstraction over OpenTelemetry APIs for plugins. |
| [LakebasePoolConfig](Interface.LakebasePoolConfig.md) | Configuration for creating a Lakebase connection pool |
@@ -42,6 +43,8 @@ plugin architecture, and React integration.
| [ResourceEntry](Interface.ResourceEntry.md) | Internal representation of a resource in the registry. Extends ResourceRequirement with resolution state and plugin ownership. |
| [ResourceFieldEntry](Interface.ResourceFieldEntry.md) | Defines a single field for a resource. Each field has its own environment variable and optional description. Single-value types use one key (e.g. id); multi-value types (database, secret) use multiple (e.g. instance_name, database_name or scope, key). |
| [ResourceRequirement](Interface.ResourceRequirement.md) | Declares a resource requirement for a plugin. Can be defined statically in a manifest or dynamically via getResourceRequirements(). Narrows the generated base: type → ResourceType enum, permission → ResourcePermission union. |
+| [ServingEndpointEntry](Interface.ServingEndpointEntry.md) | Shape of a single registry entry. |
+| [ServingEndpointRegistry](Interface.ServingEndpointRegistry.md) | Registry interface for serving endpoint type generation. Empty by default — augmented by the Vite type generator's `.d.ts` output via module augmentation. When populated, provides autocomplete for alias names and typed request/response/chunk per endpoint. |
| [StreamExecutionSettings](Interface.StreamExecutionSettings.md) | Execution settings for streaming endpoints. Extends PluginExecutionSettings with SSE stream configuration. |
| [TelemetryConfig](Interface.TelemetryConfig.md) | OpenTelemetry configuration for AppKit applications |
| [ValidationResult](Interface.ValidationResult.md) | Result of validating all registered resources against the environment. |
@@ -54,6 +57,7 @@ plugin architecture, and React integration.
| [IAppRouter](TypeAlias.IAppRouter.md) | Express router type for plugin route registration |
| [PluginData](TypeAlias.PluginData.md) | Tuple of plugin class, config, and name. Created by `toPlugin()` and passed to `createApp()`. |
| [ResourcePermission](TypeAlias.ResourcePermission.md) | Union of all possible permission levels across all resource types. |
+| [ServingFactory](TypeAlias.ServingFactory.md) | Factory function returned by `AppKit.serving`. |
| [ToPlugin](TypeAlias.ToPlugin.md) | Factory function type returned by `toPlugin()`. Accepts optional config and returns a PluginData tuple. |
## Variables
@@ -66,9 +70,12 @@ plugin architecture, and React integration.
| Function | Description |
| ------ | ------ |
+| [appKitServingTypesPlugin](Function.appKitServingTypesPlugin.md) | Vite plugin to generate TypeScript types for AppKit serving endpoints. Fetches OpenAPI schemas from Databricks and generates a .d.ts with ServingEndpointRegistry module augmentation. |
| [appKitTypesPlugin](Function.appKitTypesPlugin.md) | Vite plugin to generate types for AppKit queries. Calls generateFromEntryPoint under the hood. |
| [createApp](Function.createApp.md) | Bootstraps AppKit with the provided configuration. |
| [createLakebasePool](Function.createLakebasePool.md) | Create a Lakebase pool with appkit's logger integration. Telemetry automatically uses appkit's OpenTelemetry configuration via global registry. |
+| [extractServingEndpoints](Function.extractServingEndpoints.md) | Extract serving endpoint config from a server file by AST-parsing it. Looks for `serving({ endpoints: { alias: { env: "..." }, ... } })` calls and extracts the endpoint alias names and their environment variable mappings. |
+| [findServerFile](Function.findServerFile.md) | Find the server entry file by checking candidate paths in order. |
| [generateDatabaseCredential](Function.generateDatabaseCredential.md) | Generate OAuth credentials for Postgres database connection using the proper Postgres API. |
| [getExecutionContext](Function.getExecutionContext.md) | Get the current execution context. |
| [getLakebaseOrmConfig](Function.getLakebaseOrmConfig.md) | Get Lakebase connection configuration for ORMs that don't accept pg.Pool directly. |
diff --git a/docs/docs/api/appkit/typedoc-sidebar.ts b/docs/docs/api/appkit/typedoc-sidebar.ts
index 2f17b1d2..1d498d1a 100644
--- a/docs/docs/api/appkit/typedoc-sidebar.ts
+++ b/docs/docs/api/appkit/typedoc-sidebar.ts
@@ -97,6 +97,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Interface.DatabaseCredential",
label: "DatabaseCredential"
},
+ {
+ type: "doc",
+ id: "api/appkit/Interface.EndpointConfig",
+ label: "EndpointConfig"
+ },
{
type: "doc",
id: "api/appkit/Interface.GenerateDatabaseCredentialRequest",
@@ -142,6 +147,16 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Interface.ResourceRequirement",
label: "ResourceRequirement"
},
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ServingEndpointEntry",
+ label: "ServingEndpointEntry"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ServingEndpointRegistry",
+ label: "ServingEndpointRegistry"
+ },
{
type: "doc",
id: "api/appkit/Interface.StreamExecutionSettings",
@@ -183,6 +198,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/TypeAlias.ResourcePermission",
label: "ResourcePermission"
},
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.ServingFactory",
+ label: "ServingFactory"
+ },
{
type: "doc",
id: "api/appkit/TypeAlias.ToPlugin",
@@ -205,6 +225,11 @@ const typedocSidebar: SidebarsConfig = {
type: "category",
label: "Functions",
items: [
+ {
+ type: "doc",
+ id: "api/appkit/Function.appKitServingTypesPlugin",
+ label: "appKitServingTypesPlugin"
+ },
{
type: "doc",
id: "api/appkit/Function.appKitTypesPlugin",
@@ -220,6 +245,16 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Function.createLakebasePool",
label: "createLakebasePool"
},
+ {
+ type: "doc",
+ id: "api/appkit/Function.extractServingEndpoints",
+ label: "extractServingEndpoints"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.findServerFile",
+ label: "findServerFile"
+ },
{
type: "doc",
id: "api/appkit/Function.generateDatabaseCredential",
diff --git a/docs/docs/plugins/serving.md b/docs/docs/plugins/serving.md
new file mode 100644
index 00000000..4b2d7a54
--- /dev/null
+++ b/docs/docs/plugins/serving.md
@@ -0,0 +1,213 @@
+---
+sidebar_position: 7
+---
+
+# Serving plugin
+
+Provides an authenticated proxy to [Databricks Model Serving](https://docs.databricks.com/aws/en/machine-learning/model-serving) endpoints, with invoke and streaming support.
+
+**Key features:**
+- Named endpoint aliases for multiple serving endpoints
+- Non-streaming (`invoke`) and SSE streaming (`stream`) invocation
+- Automatic OpenAPI type generation for request/response schemas
+- Request body filtering based on endpoint schema
+- On-behalf-of (OBO) user execution
+
+## Basic usage
+
+```ts
+import { createApp, server, serving } from "@databricks/appkit";
+
+await createApp({
+ plugins: [
+ server(),
+ serving(),
+ ],
+});
+```
+
+With no configuration, the plugin reads `DATABRICKS_SERVING_ENDPOINT` from the environment and registers it under the `default` alias.
+
+## Configuration options
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `endpoints` | `Record` | `{ default: { env: "DATABRICKS_SERVING_ENDPOINT" } }` | Map of alias names to endpoint configs |
+| `timeout` | `number` | `120000` | Request timeout in ms |
+
+### Endpoint aliases
+
+Endpoint aliases let you reference multiple serving endpoints by name:
+
+```ts
+serving({
+ endpoints: {
+ llm: { env: "DATABRICKS_SERVING_ENDPOINT" },
+ classifier: { env: "DATABRICKS_SERVING_ENDPOINT_CLASSIFIER" },
+ },
+})
+```
+
+Each alias maps to an environment variable holding the actual endpoint name. If an endpoint serves multiple models, you can use `servedModel` to bypass traffic routing and target a specific model directly:
+
+```ts
+serving({
+ endpoints: {
+ llm: { env: "DATABRICKS_SERVING_ENDPOINT", servedModel: "llama-v2" },
+ },
+})
+```
+
+## Type generation
+
+The `appKitServingTypesPlugin()` Vite plugin generates TypeScript types from your serving endpoints' OpenAPI schemas. Add it to your `vite.config.ts`:
+
+```ts
+import { appKitServingTypesPlugin } from "@databricks/appkit";
+
+export default defineConfig({
+ plugins: [
+ appKitServingTypesPlugin(),
+ ],
+});
+```
+
+The plugin auto-discovers endpoint configuration from your server file (`server/index.ts` or `server/server.ts`) — no manual config passing needed.
+
+Generated types provide:
+- **Alias autocomplete** in both backend (`AppKit.serving("alias")`) and frontend hooks (`useServingStream`, `useServingInvoke`)
+- **Typed request/response/chunk** per endpoint based on OpenAPI schemas
+
+If an endpoint's OpenAPI schema is unavailable (not deployed, env var not set), the plugin generates generic fallback types. The endpoint is still usable — just without typed request/response.
+
+:::note
+Endpoints that don't define a streaming response schema in their OpenAPI spec will have `chunk: unknown`. For these endpoints, use `useServingInvoke` instead of `useServingStream` — the `response` type will still be properly typed.
+:::
+
+## Environment variables
+
+| Variable | Description |
+|----------|-------------|
+| `DATABRICKS_SERVING_ENDPOINT` | Default endpoint name (used when `endpoints` config is omitted) |
+
+When using named endpoints, define a custom environment variable per alias (e.g. `DATABRICKS_SERVING_ENDPOINT_CLASSIFIER`).
+
+## HTTP endpoints
+
+### Named mode (with `endpoints` config)
+
+- `POST /api/serving/:alias/invoke` — Non-streaming invocation
+- `POST /api/serving/:alias/stream` — SSE streaming invocation
+
+### Default mode (no `endpoints` config)
+
+- `POST /api/serving/invoke` — Non-streaming invocation
+- `POST /api/serving/stream` — SSE streaming invocation
+
+### Request format
+
+```
+POST /api/serving/:alias/invoke
+Content-Type: application/json
+
+{
+ "messages": [
+ { "role": "user", "content": "Hello" }
+ ]
+}
+```
+
+## Programmatic access
+
+The plugin exports `invoke` and `stream` methods for server-side use:
+
+```ts
+const AppKit = await createApp({
+ plugins: [
+ server(),
+ serving({
+ endpoints: {
+ llm: { env: "DATABRICKS_SERVING_ENDPOINT" },
+ },
+ }),
+ ],
+});
+
+// Non-streaming
+const result = await AppKit.serving("llm").invoke({
+ messages: [{ role: "user", content: "Hello" }],
+});
+
+// Streaming
+for await (const chunk of AppKit.serving("llm").stream({
+ messages: [{ role: "user", content: "Hello" }],
+})) {
+ console.log(chunk);
+}
+```
+
+## Frontend hooks
+
+The `@databricks/appkit-ui` package provides React hooks for serving endpoints:
+
+### useServingStream
+
+Streaming invocation via SSE:
+
+```tsx
+import { useServingStream } from "@databricks/appkit-ui/react";
+
+function ChatStream() {
+ const { stream, chunks, streaming, error, reset } = useServingStream(
+ { messages: [{ role: "user", content: "Hello" }] },
+ {
+ alias: "llm",
+ onComplete: (finalChunks) => {
+ // Called with all accumulated chunks when the stream finishes
+ console.log("Stream done, got", finalChunks.length, "chunks");
+ },
+ },
+ );
+
+ return (
+ <>
+
+
+ {chunks.map((chunk, i) => {JSON.stringify(chunk)})}
+ {error && {error}
}
+ >
+ );
+}
+```
+
+### useServingInvoke
+
+Non-streaming invocation. `invoke()` returns a promise with the response data (or `null` on error):
+
+```tsx
+import { useServingInvoke } from "@databricks/appkit-ui/react";
+
+function Classify() {
+ const { invoke, data, loading, error } = useServingInvoke(
+ { inputs: ["sample text"] },
+ { alias: "classifier" },
+ );
+
+ async function handleClick() {
+ const result = await invoke();
+ if (result) {
+ console.log("Classification result:", result);
+ }
+ }
+
+ return (
+ <>
+
+ {data && {JSON.stringify(data)}}
+ {error && {error}
}
+ >
+ );
+}
+```
+
+Both hooks accept `autoStart: true` to invoke automatically on mount.
diff --git a/docs/static/appkit-ui/styles.gen.css b/docs/static/appkit-ui/styles.gen.css
index 9a9a38eb..a2192039 100644
--- a/docs/static/appkit-ui/styles.gen.css
+++ b/docs/static/appkit-ui/styles.gen.css
@@ -831,9 +831,6 @@
.max-w-\[calc\(100\%-2rem\)\] {
max-width: calc(100% - 2rem);
}
- .max-w-full {
- max-width: 100%;
- }
.max-w-max {
max-width: max-content;
}
@@ -4514,6 +4511,11 @@
width: calc(var(--spacing) * 5);
}
}
+ .\[\&_\[data-slot\=scroll-area-viewport\]\>div\]\:\!block {
+ & [data-slot=scroll-area-viewport]>div {
+ display: block !important;
+ }
+ }
.\[\&_a\]\:underline {
& a {
text-decoration-line: underline;
@@ -4637,11 +4639,26 @@
color: var(--muted-foreground);
}
}
+ .\[\&_table\]\:block {
+ & table {
+ display: block;
+ }
+ }
+ .\[\&_table\]\:max-w-full {
+ & table {
+ max-width: 100%;
+ }
+ }
.\[\&_table\]\:border-collapse {
& table {
border-collapse: collapse;
}
}
+ .\[\&_table\]\:overflow-x-auto {
+ & table {
+ overflow-x: auto;
+ }
+ }
.\[\&_table\]\:text-xs {
& table {
font-size: var(--text-xs);
@@ -4851,6 +4868,11 @@
width: 100%;
}
}
+ .\[\&\>\*\]\:min-w-0 {
+ &>* {
+ min-width: calc(var(--spacing) * 0);
+ }
+ }
.\[\&\>\*\]\:focus-visible\:relative {
&>* {
&:focus-visible {
diff --git a/packages/appkit-ui/src/react/hooks/__tests__/use-serving-invoke.test.ts b/packages/appkit-ui/src/react/hooks/__tests__/use-serving-invoke.test.ts
new file mode 100644
index 00000000..6d5f159f
--- /dev/null
+++ b/packages/appkit-ui/src/react/hooks/__tests__/use-serving-invoke.test.ts
@@ -0,0 +1,117 @@
+import { act, renderHook, waitFor } from "@testing-library/react";
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { useServingInvoke } from "../use-serving-invoke";
+
+describe("useServingInvoke", () => {
+ beforeEach(() => {
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify({ choices: [] }), { status: 200 }),
+ );
+ });
+
+ afterEach(() => {
+ vi.restoreAllMocks();
+ });
+
+ test("initial state is idle", () => {
+ const { result } = renderHook(() => useServingInvoke({ messages: [] }));
+
+ expect(result.current.data).toBeNull();
+ expect(result.current.loading).toBe(false);
+ expect(result.current.error).toBeNull();
+ expect(typeof result.current.invoke).toBe("function");
+ });
+
+ test("calls fetch to correct URL on invoke", async () => {
+ const fetchSpy = vi.spyOn(globalThis, "fetch");
+
+ const { result } = renderHook(() =>
+ useServingInvoke({ messages: [{ role: "user", content: "Hello" }] }),
+ );
+
+ act(() => {
+ result.current.invoke();
+ });
+
+ await waitFor(() => {
+ expect(fetchSpy).toHaveBeenCalledWith(
+ "/api/serving/invoke",
+ expect.objectContaining({
+ method: "POST",
+ body: JSON.stringify({
+ messages: [{ role: "user", content: "Hello" }],
+ }),
+ }),
+ );
+ });
+ });
+
+ test("uses alias in URL when provided", async () => {
+ const fetchSpy = vi.spyOn(globalThis, "fetch");
+
+ const { result } = renderHook(() =>
+ useServingInvoke({ messages: [] }, { alias: "llm" }),
+ );
+
+ act(() => {
+ result.current.invoke();
+ });
+
+ await waitFor(() => {
+ expect(fetchSpy).toHaveBeenCalledWith(
+ "/api/serving/llm/invoke",
+ expect.any(Object),
+ );
+ });
+ });
+
+ test("sets data on successful response", async () => {
+ const responseData = {
+ choices: [{ message: { content: "Hi" } }],
+ };
+
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify(responseData), { status: 200 }),
+ );
+
+ const { result } = renderHook(() => useServingInvoke({ messages: [] }));
+
+ act(() => {
+ result.current.invoke();
+ });
+
+ await waitFor(() => {
+ expect(result.current.data).toEqual(responseData);
+ expect(result.current.loading).toBe(false);
+ });
+ });
+
+ test("sets error on failed response", async () => {
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify({ error: "Not found" }), { status: 404 }),
+ );
+
+ const { result } = renderHook(() => useServingInvoke({ messages: [] }));
+
+ await act(async () => {
+ result.current.invoke();
+ // Wait for the fetch promise chain to resolve
+ await new Promise((r) => setTimeout(r, 10));
+ });
+
+ await waitFor(() => {
+ expect(result.current.error).toBe("Not found");
+ expect(result.current.loading).toBe(false);
+ });
+ });
+
+ test("auto starts when autoStart is true", async () => {
+ const fetchSpy = vi.spyOn(globalThis, "fetch");
+
+ renderHook(() => useServingInvoke({ messages: [] }, { autoStart: true }));
+
+ await waitFor(() => {
+ expect(fetchSpy).toHaveBeenCalled();
+ });
+ });
+});
diff --git a/packages/appkit-ui/src/react/hooks/__tests__/use-serving-stream.test.ts b/packages/appkit-ui/src/react/hooks/__tests__/use-serving-stream.test.ts
new file mode 100644
index 00000000..1ab0bf44
--- /dev/null
+++ b/packages/appkit-ui/src/react/hooks/__tests__/use-serving-stream.test.ts
@@ -0,0 +1,291 @@
+import { act, renderHook, waitFor } from "@testing-library/react";
+import { afterEach, describe, expect, test, vi } from "vitest";
+
+// Mock connectSSE — capture callbacks so we can simulate SSE events
+let capturedCallbacks: {
+ onMessage?: (msg: { data: string }) => void;
+ onError?: (err: Error) => void;
+ signal?: AbortSignal;
+} = {};
+
+let resolveStream: (() => void) | null = null;
+
+const mockConnectSSE = vi.fn().mockImplementation((opts: any) => {
+ capturedCallbacks = {
+ onMessage: opts.onMessage,
+ onError: opts.onError,
+ signal: opts.signal,
+ };
+ return new Promise((resolve) => {
+ resolveStream = resolve;
+ // Also resolve after a tick as fallback for tests that don't manually resolve
+ setTimeout(resolve, 0);
+ });
+});
+
+vi.mock("@/js", () => ({
+ connectSSE: (...args: unknown[]) => mockConnectSSE(...args),
+}));
+
+import { useServingStream } from "../use-serving-stream";
+
+describe("useServingStream", () => {
+ afterEach(() => {
+ capturedCallbacks = {};
+ resolveStream = null;
+ vi.clearAllMocks();
+ });
+
+ test("initial state is idle", () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ expect(result.current.chunks).toEqual([]);
+ expect(result.current.streaming).toBe(false);
+ expect(result.current.error).toBeNull();
+ expect(typeof result.current.stream).toBe("function");
+ expect(typeof result.current.reset).toBe("function");
+ });
+
+ test("calls connectSSE with correct URL on stream", () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ expect(mockConnectSSE).toHaveBeenCalledWith(
+ expect.objectContaining({
+ url: "/api/serving/stream",
+ payload: JSON.stringify({ messages: [] }),
+ }),
+ );
+ });
+
+ test("uses override body when passed to stream()", () => {
+ const { result } = renderHook(() =>
+ useServingStream({ messages: [{ role: "user", content: "old" }] }),
+ );
+
+ const overrideBody = {
+ messages: [{ role: "user" as const, content: "new" }],
+ };
+
+ act(() => {
+ result.current.stream(overrideBody);
+ });
+
+ expect(mockConnectSSE).toHaveBeenCalledWith(
+ expect.objectContaining({
+ payload: JSON.stringify(overrideBody),
+ }),
+ );
+ });
+
+ test("uses alias in URL when provided", () => {
+ const { result } = renderHook(() =>
+ useServingStream({ messages: [] }, { alias: "embedder" }),
+ );
+
+ act(() => {
+ result.current.stream();
+ });
+
+ expect(mockConnectSSE).toHaveBeenCalledWith(
+ expect.objectContaining({
+ url: "/api/serving/embedder/stream",
+ }),
+ );
+ });
+
+ test("sets streaming to true when stream() is called", () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ expect(result.current.streaming).toBe(true);
+ });
+
+ test("accumulates chunks from onMessage", async () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ act(() => {
+ capturedCallbacks.onMessage?.({ data: JSON.stringify({ id: 1 }) });
+ });
+
+ act(() => {
+ capturedCallbacks.onMessage?.({ data: JSON.stringify({ id: 2 }) });
+ });
+
+ expect(result.current.chunks).toEqual([{ id: 1 }, { id: 2 }]);
+ });
+
+ test("accumulates chunks with error field as normal data", async () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ act(() => {
+ capturedCallbacks.onMessage?.({
+ data: JSON.stringify({ error: "Model overloaded" }),
+ });
+ });
+
+ // Chunks with an `error` field are treated as data, not stream errors.
+ // Transport-level errors are delivered via onError callback instead.
+ expect(result.current.chunks).toEqual([{ error: "Model overloaded" }]);
+ expect(result.current.error).toBeNull();
+ expect(result.current.streaming).toBe(true);
+ });
+
+ test("sets error from onError callback", async () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ act(() => {
+ capturedCallbacks.onError?.(new Error("Connection lost"));
+ });
+
+ expect(result.current.error).toBe("Connection lost");
+ expect(result.current.streaming).toBe(false);
+ });
+
+ test("silently skips malformed JSON messages", () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ act(() => {
+ capturedCallbacks.onMessage?.({ data: "not valid json{" });
+ });
+
+ // No chunks added, no error set
+ expect(result.current.chunks).toEqual([]);
+ expect(result.current.error).toBeNull();
+ });
+
+ test("reset() clears state and aborts active stream", () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ act(() => {
+ capturedCallbacks.onMessage?.({ data: JSON.stringify({ id: 1 }) });
+ });
+
+ expect(result.current.chunks).toHaveLength(1);
+ expect(result.current.streaming).toBe(true);
+
+ act(() => {
+ result.current.reset();
+ });
+
+ expect(result.current.chunks).toEqual([]);
+ expect(result.current.streaming).toBe(false);
+ expect(result.current.error).toBeNull();
+ });
+
+ test("autoStart triggers stream on mount", async () => {
+ renderHook(() => useServingStream({ messages: [] }, { autoStart: true }));
+
+ await waitFor(() => {
+ expect(mockConnectSSE).toHaveBeenCalled();
+ });
+ });
+
+ test("passes abort signal to connectSSE", () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ expect(capturedCallbacks.signal).toBeDefined();
+ expect(capturedCallbacks.signal?.aborted).toBe(false);
+ });
+
+ test("aborts stream on unmount", () => {
+ const { result, unmount } = renderHook(() =>
+ useServingStream({ messages: [] }),
+ );
+
+ act(() => {
+ result.current.stream();
+ });
+
+ const signal = capturedCallbacks.signal;
+ expect(signal?.aborted).toBe(false);
+
+ unmount();
+
+ expect(signal?.aborted).toBe(true);
+ });
+
+ test("sets streaming to false when connectSSE resolves", async () => {
+ const { result } = renderHook(() => useServingStream({ messages: [] }));
+
+ act(() => {
+ result.current.stream();
+ });
+
+ await waitFor(() => {
+ expect(result.current.streaming).toBe(false);
+ });
+ });
+
+ test("calls onComplete with accumulated chunks when stream finishes", async () => {
+ const onComplete = vi.fn();
+
+ // Use a controllable mock so stream doesn't auto-resolve
+ mockConnectSSE.mockImplementationOnce((opts: any) => {
+ capturedCallbacks = {
+ onMessage: opts.onMessage,
+ onError: opts.onError,
+ signal: opts.signal,
+ };
+ return new Promise((resolve) => {
+ resolveStream = resolve;
+ });
+ });
+
+ const { result } = renderHook(() =>
+ useServingStream({ messages: [] }, { onComplete }),
+ );
+
+ act(() => {
+ result.current.stream();
+ });
+
+ // Send two chunks
+ act(() => {
+ capturedCallbacks.onMessage?.({ data: JSON.stringify({ id: 1 }) });
+ });
+ act(() => {
+ capturedCallbacks.onMessage?.({ data: JSON.stringify({ id: 2 }) });
+ });
+
+ expect(onComplete).not.toHaveBeenCalled();
+
+ // Complete the stream
+ await act(async () => {
+ resolveStream?.();
+ await new Promise((r) => setTimeout(r, 0));
+ });
+
+ expect(onComplete).toHaveBeenCalledWith([{ id: 1 }, { id: 2 }]);
+ });
+});
diff --git a/packages/appkit-ui/src/react/hooks/index.ts b/packages/appkit-ui/src/react/hooks/index.ts
index 84d51b53..a425b010 100644
--- a/packages/appkit-ui/src/react/hooks/index.ts
+++ b/packages/appkit-ui/src/react/hooks/index.ts
@@ -2,8 +2,13 @@ export type {
AnalyticsFormat,
InferResultByFormat,
InferRowType,
+ InferServingChunk,
+ InferServingRequest,
+ InferServingResponse,
PluginRegistry,
QueryRegistry,
+ ServingAlias,
+ ServingEndpointRegistry,
TypedArrowTable,
UseAnalyticsQueryOptions,
UseAnalyticsQueryResult,
@@ -15,3 +20,13 @@ export {
useChartData,
} from "./use-chart-data";
export { usePluginClientConfig } from "./use-plugin-config";
+export {
+ type UseServingInvokeOptions,
+ type UseServingInvokeResult,
+ useServingInvoke,
+} from "./use-serving-invoke";
+export {
+ type UseServingStreamOptions,
+ type UseServingStreamResult,
+ useServingStream,
+} from "./use-serving-stream";
diff --git a/packages/appkit-ui/src/react/hooks/types.ts b/packages/appkit-ui/src/react/hooks/types.ts
index 5db725fc..19ce1fac 100644
--- a/packages/appkit-ui/src/react/hooks/types.ts
+++ b/packages/appkit-ui/src/react/hooks/types.ts
@@ -134,3 +134,54 @@ export type InferParams = K extends AugmentedRegistry
export interface PluginRegistry {
[key: string]: Record;
}
+
+// ============================================================================
+// Serving Endpoint Registry
+// ============================================================================
+
+/**
+ * Serving endpoint registry for type-safe alias names.
+ * Extend this interface via module augmentation to get alias autocomplete:
+ *
+ * @example
+ * ```typescript
+ * // Auto-generated by appKitServingTypesPlugin()
+ * declare module "@databricks/appkit-ui/react" {
+ * interface ServingEndpointRegistry {
+ * llm: { request: {...}; response: {...}; chunk: {...} };
+ * }
+ * }
+ * ```
+ */
+// biome-ignore lint/suspicious/noEmptyInterface: intentionally empty — populated via module augmentation
+export interface ServingEndpointRegistry {}
+
+/** Resolves to registry keys if populated, otherwise string */
+export type ServingAlias =
+ AugmentedRegistry extends never
+ ? string
+ : AugmentedRegistry;
+
+/** Infers chunk type from registry when alias is a known key */
+export type InferServingChunk =
+ K extends AugmentedRegistry
+ ? ServingEndpointRegistry[K] extends { chunk: infer C }
+ ? C
+ : unknown
+ : unknown;
+
+/** Infers response type from registry when alias is a known key */
+export type InferServingResponse =
+ K extends AugmentedRegistry
+ ? ServingEndpointRegistry[K] extends { response: infer R }
+ ? R
+ : unknown
+ : unknown;
+
+/** Infers request type from registry when alias is a known key */
+export type InferServingRequest =
+ K extends AugmentedRegistry
+ ? ServingEndpointRegistry[K] extends { request: infer Req }
+ ? Req
+ : Record
+ : Record;
diff --git a/packages/appkit-ui/src/react/hooks/use-serving-invoke.ts b/packages/appkit-ui/src/react/hooks/use-serving-invoke.ts
new file mode 100644
index 00000000..8e80e82e
--- /dev/null
+++ b/packages/appkit-ui/src/react/hooks/use-serving-invoke.ts
@@ -0,0 +1,111 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import type {
+ InferServingRequest,
+ InferServingResponse,
+ ServingAlias,
+} from "./types";
+
+export interface UseServingInvokeOptions<
+ K extends ServingAlias = ServingAlias,
+> {
+ /** Endpoint alias for named mode. Omit for default mode. */
+ alias?: K;
+ /** If false, does not invoke automatically on mount. Default: false */
+ autoStart?: boolean;
+}
+
+export interface UseServingInvokeResult<
+ T = unknown,
+ TBody = Record,
+> {
+ /** Trigger the invocation. Pass an optional body override for this invocation. */
+ invoke: (overrideBody?: TBody) => Promise;
+ /** Response data, null until loaded. */
+ data: T | null;
+ /** Whether a request is in progress. */
+ loading: boolean;
+ /** Error message, if any. */
+ error: string | null;
+}
+
+/**
+ * Hook for non-streaming invocation of a serving endpoint.
+ * Calls `POST /api/serving/invoke` (default) or `POST /api/serving/{alias}/invoke` (named).
+ *
+ * When the type generator has populated `ServingEndpointRegistry`, the response type
+ * is automatically inferred from the endpoint's OpenAPI schema.
+ */
+export function useServingInvoke(
+ body: InferServingRequest,
+ options: UseServingInvokeOptions = {} as UseServingInvokeOptions,
+): UseServingInvokeResult, InferServingRequest> {
+ type TResponse = InferServingResponse;
+ const { alias, autoStart = false } = options;
+
+ const [data, setData] = useState(null);
+ const [loading, setLoading] = useState(false);
+ const [error, setError] = useState(null);
+ const abortControllerRef = useRef(null);
+
+ const urlSuffix = alias
+ ? `/api/serving/${encodeURIComponent(String(alias))}/invoke`
+ : "/api/serving/invoke";
+
+ const bodyJson = JSON.stringify(body);
+
+ const invoke = useCallback(
+ (overrideBody?: InferServingRequest): Promise => {
+ if (abortControllerRef.current) {
+ abortControllerRef.current.abort();
+ }
+
+ setLoading(true);
+ setError(null);
+ setData(null);
+
+ const abortController = new AbortController();
+ abortControllerRef.current = abortController;
+
+ const payload = overrideBody ? JSON.stringify(overrideBody) : bodyJson;
+
+ return fetch(urlSuffix, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: payload,
+ signal: abortController.signal,
+ })
+ .then(async (res) => {
+ if (!res.ok) {
+ const errorBody = await res.json().catch(() => null);
+ throw new Error(errorBody?.error || `HTTP ${res.status}`);
+ }
+ return res.json();
+ })
+ .then((result: TResponse) => {
+ if (abortController.signal.aborted) return null;
+ setData(result);
+ setLoading(false);
+ return result;
+ })
+ .catch((err: Error) => {
+ if (abortController.signal.aborted) return null;
+ setError(err.message || "Request failed");
+ setLoading(false);
+ return null;
+ });
+ },
+ [urlSuffix, bodyJson],
+ );
+
+ useEffect(() => {
+ if (autoStart) {
+ invoke();
+ }
+
+ return () => {
+ abortControllerRef.current?.abort();
+ };
+ }, [invoke, autoStart]);
+
+ return { invoke, data, loading, error };
+}
diff --git a/packages/appkit-ui/src/react/hooks/use-serving-stream.ts b/packages/appkit-ui/src/react/hooks/use-serving-stream.ts
new file mode 100644
index 00000000..f0bb7bf2
--- /dev/null
+++ b/packages/appkit-ui/src/react/hooks/use-serving-stream.ts
@@ -0,0 +1,137 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { connectSSE } from "@/js";
+import type {
+ InferServingChunk,
+ InferServingRequest,
+ ServingAlias,
+} from "./types";
+
+export interface UseServingStreamOptions<
+ K extends ServingAlias = ServingAlias,
+ T = InferServingChunk,
+> {
+ /** Endpoint alias for named mode. Omit for default mode. */
+ alias?: K;
+ /** If true, starts streaming automatically on mount. Default: false */
+ autoStart?: boolean;
+ /** Called with accumulated chunks when the stream completes successfully. */
+ onComplete?: (chunks: T[]) => void;
+}
+
+export interface UseServingStreamResult<
+ T = unknown,
+ TBody = Record,
+> {
+ /** Trigger the streaming invocation. Pass an optional body override for this invocation. */
+ stream: (overrideBody?: TBody) => void;
+ /** Accumulated chunks received so far. */
+ chunks: T[];
+ /** Whether streaming is in progress. */
+ streaming: boolean;
+ /** Error message, if any. */
+ error: string | null;
+ /** Reset chunks and abort any active stream. */
+ reset: () => void;
+}
+
+/**
+ * Hook for streaming invocation of a serving endpoint via SSE.
+ * Calls `POST /api/serving/stream` (default) or `POST /api/serving/{alias}/stream` (named).
+ * Accumulates parsed chunks in state.
+ *
+ * When the type generator has populated `ServingEndpointRegistry`, the chunk type
+ * is automatically inferred from the endpoint's OpenAPI schema.
+ */
+export function useServingStream(
+ body: InferServingRequest,
+ options: UseServingStreamOptions = {} as UseServingStreamOptions,
+): UseServingStreamResult, InferServingRequest> {
+ type TChunk = InferServingChunk;
+ const { alias, autoStart = false, onComplete } = options;
+
+ const [chunks, setChunks] = useState([]);
+ const [streaming, setStreaming] = useState(false);
+ const [error, setError] = useState(null);
+ const abortControllerRef = useRef(null);
+ const chunksRef = useRef([]);
+ const onCompleteRef = useRef(onComplete);
+ onCompleteRef.current = onComplete;
+
+ const urlSuffix = alias
+ ? `/api/serving/${encodeURIComponent(String(alias))}/stream`
+ : "/api/serving/stream";
+
+ const reset = useCallback(() => {
+ abortControllerRef.current?.abort();
+ abortControllerRef.current = null;
+ chunksRef.current = [];
+ setChunks([]);
+ setStreaming(false);
+ setError(null);
+ }, []);
+
+ const bodyJson = JSON.stringify(body);
+
+ const stream = useCallback(
+ (overrideBody?: InferServingRequest) => {
+ // Abort any existing stream
+ abortControllerRef.current?.abort();
+
+ setStreaming(true);
+ setError(null);
+ setChunks([]);
+ chunksRef.current = [];
+
+ const abortController = new AbortController();
+ abortControllerRef.current = abortController;
+
+ const payload = overrideBody ? JSON.stringify(overrideBody) : bodyJson;
+
+ connectSSE({
+ url: urlSuffix,
+ payload,
+ signal: abortController.signal,
+ onMessage: async (message) => {
+ if (abortController.signal.aborted) return;
+ try {
+ const parsed = JSON.parse(message.data);
+
+ chunksRef.current = [...chunksRef.current, parsed as TChunk];
+ setChunks(chunksRef.current);
+ } catch {
+ // Skip malformed messages
+ }
+ },
+ onError: (err) => {
+ if (abortController.signal.aborted) return;
+ setStreaming(false);
+ setError(err instanceof Error ? err.message : "Streaming failed");
+ },
+ })
+ .then(() => {
+ if (abortController.signal.aborted) return;
+ // Stream completed
+ setStreaming(false);
+ onCompleteRef.current?.(chunksRef.current);
+ })
+ .catch(() => {
+ if (abortController.signal.aborted) return;
+ setStreaming(false);
+ setError("Connection error");
+ });
+ },
+ [urlSuffix, bodyJson],
+ );
+
+ useEffect(() => {
+ if (autoStart) {
+ stream();
+ }
+
+ return () => {
+ abortControllerRef.current?.abort();
+ };
+ }, [stream, autoStart]);
+
+ return { stream, chunks, streaming, error, reset };
+}
diff --git a/packages/appkit/package.json b/packages/appkit/package.json
index 9e810b97..06da3ee1 100644
--- a/packages/appkit/package.json
+++ b/packages/appkit/package.json
@@ -50,6 +50,7 @@
"typecheck": "tsc --noEmit"
},
"dependencies": {
+ "@ast-grep/napi": "0.37.0",
"@databricks/lakebase": "workspace:*",
"@databricks/sdk-experimental": "0.16.0",
"@opentelemetry/api": "1.9.0",
diff --git a/packages/appkit/src/connectors/serving/client.ts b/packages/appkit/src/connectors/serving/client.ts
new file mode 100644
index 00000000..6254426d
--- /dev/null
+++ b/packages/appkit/src/connectors/serving/client.ts
@@ -0,0 +1,223 @@
+import { ApiError, type WorkspaceClient } from "@databricks/sdk-experimental";
+import { createLogger } from "../../logging/logger";
+import type { ServingInvokeOptions } from "./types";
+
+const logger = createLogger("connectors:serving");
+
+/**
+ * Builds the invocation URL for a serving endpoint.
+ * Uses `/served-models/{model}/invocations` when servedModel is specified,
+ * otherwise `/serving-endpoints/{name}/invocations`.
+ */
+function buildInvocationUrl(
+ host: string,
+ endpointName: string,
+ servedModel?: string,
+): string {
+ const base = host.startsWith("http") ? host : `https://${host}`;
+ const encodedName = encodeURIComponent(endpointName);
+ const path = servedModel
+ ? `/serving-endpoints/${encodedName}/served-models/${encodeURIComponent(servedModel)}/invocations`
+ : `/serving-endpoints/${encodedName}/invocations`;
+ return new URL(path, base).toString();
+}
+
+/**
+ * Maps upstream Databricks error status codes to appropriate proxy responses.
+ */
+function mapUpstreamError(
+ status: number,
+ body: string,
+ headers: Headers,
+): ApiError {
+ const safeMessage = body.length > 500 ? `${body.slice(0, 500)}...` : body;
+
+ let parsed: { message?: string; error?: string } = {};
+ try {
+ parsed = JSON.parse(body);
+ } catch {
+ // body is not JSON
+ }
+
+ const message = parsed.message || parsed.error || safeMessage;
+
+ switch (true) {
+ case status === 400:
+ return new ApiError(message, "BAD_REQUEST", 400, undefined, []);
+ case status === 401 || status === 403:
+ logger.warn("Authentication failure from serving endpoint: %s", message);
+ return new ApiError(message, "AUTH_FAILURE", status, undefined, []);
+ case status === 404:
+ return new ApiError(message, "NOT_FOUND", 404, undefined, []);
+ case status === 429: {
+ const retryAfter = headers.get("retry-after");
+ const retryMessage = retryAfter
+ ? `${message} (retry-after: ${retryAfter})`
+ : message;
+ return new ApiError(retryMessage, "RATE_LIMITED", 429, undefined, []);
+ }
+ case status === 503:
+ return new ApiError(
+ "Endpoint loading, retry shortly",
+ "SERVICE_UNAVAILABLE",
+ 503,
+ undefined,
+ [],
+ );
+ case status >= 500:
+ return new ApiError(message, "BAD_GATEWAY", 502, undefined, []);
+ default:
+ return new ApiError(message, "UNKNOWN", status, undefined, []);
+ }
+}
+
+/**
+ * Invokes a serving endpoint and returns the parsed JSON response.
+ */
+export async function invoke(
+ client: WorkspaceClient,
+ endpointName: string,
+ body: Record,
+ options?: ServingInvokeOptions,
+): Promise {
+ const host = client.config.host;
+ if (!host) {
+ throw new Error(
+ "Databricks host is not configured. Set DATABRICKS_HOST or configure client.config.host.",
+ );
+ }
+
+ const url = buildInvocationUrl(host, endpointName, options?.servedModel);
+
+ // Always strip `stream` from the body — the connector controls this
+ const { stream: _stream, ...cleanBody } = body;
+
+ const headers = new Headers({
+ "Content-Type": "application/json",
+ Accept: "application/json",
+ });
+ await client.config.authenticate(headers);
+
+ logger.debug("Invoking endpoint %s at %s", endpointName, url);
+
+ const res = await fetch(url, {
+ method: "POST",
+ headers,
+ body: JSON.stringify(cleanBody),
+ signal: options?.signal,
+ });
+
+ if (!res.ok) {
+ const text = await res.text();
+ throw mapUpstreamError(res.status, text, res.headers);
+ }
+
+ return res.json();
+}
+
+/**
+ * Invokes a serving endpoint with streaming enabled.
+ * Yields parsed JSON chunks from the NDJSON SSE response.
+ */
+export async function* stream(
+ client: WorkspaceClient,
+ endpointName: string,
+ body: Record,
+ options?: ServingInvokeOptions,
+): AsyncGenerator {
+ const host = client.config.host;
+ if (!host) {
+ throw new Error(
+ "Databricks host is not configured. Set DATABRICKS_HOST or configure client.config.host.",
+ );
+ }
+
+ const url = buildInvocationUrl(host, endpointName, options?.servedModel);
+
+ // Strip any user-provided `stream` and inject `stream: true`
+ const { stream: _stream, ...cleanBody } = body;
+ const streamBody = { ...cleanBody, stream: true };
+
+ const headers = new Headers({
+ "Content-Type": "application/json",
+ Accept: "text/event-stream",
+ });
+ await client.config.authenticate(headers);
+
+ logger.debug("Streaming from endpoint %s at %s", endpointName, url);
+
+ const res = await fetch(url, {
+ method: "POST",
+ headers,
+ body: JSON.stringify(streamBody),
+ signal: options?.signal,
+ });
+
+ if (!res.ok) {
+ const text = await res.text();
+ throw mapUpstreamError(res.status, text, res.headers);
+ }
+
+ if (!res.body) {
+ throw new Error("Response body is null — streaming not supported");
+ }
+
+ const reader = res.body.getReader();
+ const decoder = new TextDecoder();
+ let buffer = "";
+ const MAX_BUFFER_SIZE = 1024 * 1024; // 1 MB
+
+ try {
+ while (true) {
+ if (options?.signal?.aborted) break;
+
+ const { done, value } = await reader.read();
+ if (done) break;
+
+ buffer += decoder.decode(value, { stream: true });
+
+ if (buffer.length > MAX_BUFFER_SIZE) {
+ logger.warn(
+ "Stream buffer exceeded %d bytes, discarding incomplete data",
+ MAX_BUFFER_SIZE,
+ );
+ buffer = "";
+ }
+
+ // Process complete lines from the buffer
+ const lines = buffer.split("\n");
+ // Keep the last (potentially incomplete) line in the buffer
+ buffer = lines.pop() ?? "";
+
+ for (const line of lines) {
+ const trimmed = line.trim();
+ if (!trimmed || trimmed.startsWith(":")) continue; // skip empty lines and SSE comments
+ if (trimmed === "data: [DONE]") return;
+
+ if (trimmed.startsWith("data: ")) {
+ const jsonStr = trimmed.slice(6);
+ try {
+ yield JSON.parse(jsonStr);
+ } catch {
+ logger.warn("Failed to parse streaming chunk: %s", jsonStr);
+ }
+ }
+ }
+ }
+
+ // Process any remaining data in the buffer
+ if (buffer.trim() && !options?.signal?.aborted) {
+ const trimmed = buffer.trim();
+ if (trimmed.startsWith("data: ") && trimmed !== "data: [DONE]") {
+ try {
+ yield JSON.parse(trimmed.slice(6));
+ } catch {
+ logger.warn("Failed to parse final streaming chunk: %s", trimmed);
+ }
+ }
+ }
+ } finally {
+ reader.cancel().catch(() => {});
+ reader.releaseLock();
+ }
+}
diff --git a/packages/appkit/src/connectors/serving/tests/client.test.ts b/packages/appkit/src/connectors/serving/tests/client.test.ts
new file mode 100644
index 00000000..6af859ae
--- /dev/null
+++ b/packages/appkit/src/connectors/serving/tests/client.test.ts
@@ -0,0 +1,303 @@
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { invoke, stream } from "../client";
+
+const mockAuthenticate = vi.fn();
+
+function createMockClient(host = "https://test.databricks.com") {
+ return {
+ config: {
+ host,
+ authenticate: mockAuthenticate,
+ },
+ } as any;
+}
+
+describe("Serving Connector", () => {
+ beforeEach(() => {
+ mockAuthenticate.mockResolvedValue(undefined);
+ });
+
+ afterEach(() => {
+ vi.restoreAllMocks();
+ });
+
+ describe("invoke", () => {
+ test("constructs correct URL for endpoint invocation", async () => {
+ const fetchSpy = vi
+ .spyOn(globalThis, "fetch")
+ .mockResolvedValue(
+ new Response(JSON.stringify({ result: "ok" }), { status: 200 }),
+ );
+
+ const client = createMockClient();
+ await invoke(client, "my-endpoint", { messages: [] });
+
+ expect(fetchSpy).toHaveBeenCalledWith(
+ "https://test.databricks.com/serving-endpoints/my-endpoint/invocations",
+ expect.objectContaining({ method: "POST" }),
+ );
+ });
+
+ test("constructs correct URL with servedModel override", async () => {
+ const fetchSpy = vi
+ .spyOn(globalThis, "fetch")
+ .mockResolvedValue(
+ new Response(JSON.stringify({ result: "ok" }), { status: 200 }),
+ );
+
+ const client = createMockClient();
+ await invoke(
+ client,
+ "my-endpoint",
+ { messages: [] },
+ { servedModel: "llama-v2" },
+ );
+
+ expect(fetchSpy).toHaveBeenCalledWith(
+ "https://test.databricks.com/serving-endpoints/my-endpoint/served-models/llama-v2/invocations",
+ expect.objectContaining({ method: "POST" }),
+ );
+ });
+
+ test("authenticates request headers", async () => {
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify({ result: "ok" }), { status: 200 }),
+ );
+
+ const client = createMockClient();
+ await invoke(client, "my-endpoint", { messages: [] });
+
+ expect(mockAuthenticate).toHaveBeenCalledWith(expect.any(Headers));
+ });
+
+ test("strips stream property from body", async () => {
+ const fetchSpy = vi
+ .spyOn(globalThis, "fetch")
+ .mockResolvedValue(
+ new Response(JSON.stringify({ result: "ok" }), { status: 200 }),
+ );
+
+ const client = createMockClient();
+ await invoke(client, "my-endpoint", {
+ messages: [],
+ stream: true,
+ temperature: 0.7,
+ });
+
+ const body = JSON.parse(fetchSpy.mock.calls[0][1]?.body as string);
+ expect(body).toEqual({ messages: [], temperature: 0.7 });
+ expect(body.stream).toBeUndefined();
+ });
+
+ test("returns parsed JSON response", async () => {
+ const responseData = { choices: [{ message: { content: "Hello" } }] };
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify(responseData), { status: 200 }),
+ );
+
+ const client = createMockClient();
+ const result = await invoke(client, "my-endpoint", { messages: [] });
+
+ expect(result).toEqual(responseData);
+ });
+
+ test("throws ApiError on 400 response", async () => {
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify({ message: "Invalid params" }), {
+ status: 400,
+ }),
+ );
+
+ const client = createMockClient();
+ await expect(
+ invoke(client, "my-endpoint", { messages: [] }),
+ ).rejects.toThrow("Invalid params");
+ });
+
+ test("throws ApiError on 404 response", async () => {
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify({ message: "Endpoint not found" }), {
+ status: 404,
+ }),
+ );
+
+ const client = createMockClient();
+ await expect(
+ invoke(client, "my-endpoint", { messages: [] }),
+ ).rejects.toThrow("Endpoint not found");
+ });
+
+ test("maps 5xx to 502 bad gateway", async () => {
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify({ message: "Internal error" }), {
+ status: 500,
+ }),
+ );
+
+ const client = createMockClient();
+ try {
+ await invoke(client, "my-endpoint", { messages: [] });
+ expect.unreachable("Should have thrown");
+ } catch (err: any) {
+ expect(err.statusCode).toBe(502);
+ }
+ });
+
+ test("forwards AbortSignal", async () => {
+ const controller = new AbortController();
+ const fetchSpy = vi
+ .spyOn(globalThis, "fetch")
+ .mockResolvedValue(
+ new Response(JSON.stringify({ result: "ok" }), { status: 200 }),
+ );
+
+ const client = createMockClient();
+ await invoke(
+ client,
+ "my-endpoint",
+ { messages: [] },
+ { signal: controller.signal },
+ );
+
+ expect(fetchSpy.mock.calls[0][1]?.signal).toBe(controller.signal);
+ });
+
+ test("throws when host is not configured", async () => {
+ const client = {
+ config: {
+ host: "",
+ authenticate: mockAuthenticate,
+ },
+ } as any;
+ await expect(
+ invoke(client, "my-endpoint", { messages: [] }),
+ ).rejects.toThrow("Databricks host is not configured");
+ });
+
+ test("prepends https:// to host without protocol", async () => {
+ const fetchSpy = vi
+ .spyOn(globalThis, "fetch")
+ .mockResolvedValue(
+ new Response(JSON.stringify({ result: "ok" }), { status: 200 }),
+ );
+
+ const client = createMockClient("test.databricks.com");
+ await invoke(client, "my-endpoint", { messages: [] });
+
+ expect(fetchSpy.mock.calls[0][0]).toContain(
+ "https://test.databricks.com",
+ );
+ });
+ });
+
+ describe("stream", () => {
+ function createSSEResponse(chunks: string[]) {
+ const body = `${chunks.join("\n")}\n`;
+ return new Response(body, {
+ status: 200,
+ headers: { "Content-Type": "text/event-stream" },
+ });
+ }
+
+ test("yields parsed NDJSON chunks", async () => {
+ const chunks = [
+ 'data: {"choices":[{"delta":{"content":"Hello"}}]}',
+ 'data: {"choices":[{"delta":{"content":" world"}}]}',
+ "data: [DONE]",
+ ];
+
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ createSSEResponse(chunks),
+ );
+
+ const client = createMockClient();
+ const results: unknown[] = [];
+ for await (const chunk of stream(client, "my-endpoint", {
+ messages: [],
+ })) {
+ results.push(chunk);
+ }
+
+ expect(results).toEqual([
+ { choices: [{ delta: { content: "Hello" } }] },
+ { choices: [{ delta: { content: " world" } }] },
+ ]);
+ });
+
+ test("injects stream: true into body", async () => {
+ const fetchSpy = vi
+ .spyOn(globalThis, "fetch")
+ .mockResolvedValue(createSSEResponse(["data: [DONE]"]));
+
+ const client = createMockClient();
+ // Consume the generator
+ for await (const _ of stream(client, "my-endpoint", { messages: [] })) {
+ // noop
+ }
+
+ const body = JSON.parse(fetchSpy.mock.calls[0][1]?.body as string);
+ expect(body.stream).toBe(true);
+ });
+
+ test("strips user-provided stream and re-injects", async () => {
+ const fetchSpy = vi
+ .spyOn(globalThis, "fetch")
+ .mockResolvedValue(createSSEResponse(["data: [DONE]"]));
+
+ const client = createMockClient();
+ for await (const _ of stream(client, "my-endpoint", {
+ messages: [],
+ stream: false,
+ })) {
+ // noop
+ }
+
+ const body = JSON.parse(fetchSpy.mock.calls[0][1]?.body as string);
+ expect(body.stream).toBe(true);
+ });
+
+ test("skips SSE comments and empty lines", async () => {
+ const chunks = [
+ ": this is a comment",
+ "",
+ 'data: {"choices":[{"delta":{"content":"Hi"}}]}',
+ "",
+ "data: [DONE]",
+ ];
+
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ createSSEResponse(chunks),
+ );
+
+ const client = createMockClient();
+ const results: unknown[] = [];
+ for await (const chunk of stream(client, "my-endpoint", {
+ messages: [],
+ })) {
+ results.push(chunk);
+ }
+
+ expect(results).toHaveLength(1);
+ expect(results[0]).toEqual({ choices: [{ delta: { content: "Hi" } }] });
+ });
+
+ test("throws on non-OK response", async () => {
+ vi.spyOn(globalThis, "fetch").mockResolvedValue(
+ new Response(JSON.stringify({ message: "Rate limited" }), {
+ status: 429,
+ headers: { "Retry-After": "5" },
+ }),
+ );
+
+ const client = createMockClient();
+ try {
+ for await (const _ of stream(client, "my-endpoint", { messages: [] })) {
+ // noop
+ }
+ expect.unreachable("Should have thrown");
+ } catch (err: any) {
+ expect(err.statusCode).toBe(429);
+ }
+ });
+ });
+});
diff --git a/packages/appkit/src/connectors/serving/types.ts b/packages/appkit/src/connectors/serving/types.ts
new file mode 100644
index 00000000..6dd1acba
--- /dev/null
+++ b/packages/appkit/src/connectors/serving/types.ts
@@ -0,0 +1,4 @@
+export interface ServingInvokeOptions {
+ servedModel?: string;
+ signal?: AbortSignal;
+}
diff --git a/packages/appkit/src/index.ts b/packages/appkit/src/index.ts
index 8db7f1d7..3df5572b 100644
--- a/packages/appkit/src/index.ts
+++ b/packages/appkit/src/index.ts
@@ -48,7 +48,13 @@ export {
} from "./errors";
// Plugin authoring
export { Plugin, type ToPlugin, toPlugin } from "./plugin";
-export { analytics, files, genie, lakebase, server } from "./plugins";
+export { analytics, files, genie, lakebase, server, serving } from "./plugins";
+export type {
+ EndpointConfig,
+ ServingEndpointEntry,
+ ServingEndpointRegistry,
+ ServingFactory,
+} from "./plugins/serving/types";
// Registry types and utilities for plugin manifests
export type {
ConfigSchema,
@@ -75,6 +81,10 @@ export {
SpanStatusCode,
type TelemetryConfig,
} from "./telemetry";
-
+export {
+ extractServingEndpoints,
+ findServerFile,
+} from "./type-generator/serving/server-file-extractor";
+export { appKitServingTypesPlugin } from "./type-generator/serving/vite-plugin";
// Vite plugin and type generation
export { appKitTypesPlugin } from "./type-generator/vite-plugin";
diff --git a/packages/appkit/src/plugins/index.ts b/packages/appkit/src/plugins/index.ts
index 7caa040f..4d58082f 100644
--- a/packages/appkit/src/plugins/index.ts
+++ b/packages/appkit/src/plugins/index.ts
@@ -3,3 +3,4 @@ export * from "./files";
export * from "./genie";
export * from "./lakebase";
export * from "./server";
+export * from "./serving";
diff --git a/packages/appkit/src/plugins/serving/defaults.ts b/packages/appkit/src/plugins/serving/defaults.ts
new file mode 100644
index 00000000..1fea64c2
--- /dev/null
+++ b/packages/appkit/src/plugins/serving/defaults.ts
@@ -0,0 +1,26 @@
+import type { StreamExecutionSettings } from "shared";
+
+export const servingInvokeDefaults = {
+ cache: {
+ enabled: false,
+ },
+ retry: {
+ enabled: false,
+ },
+ timeout: 120_000,
+};
+
+export const servingStreamDefaults: StreamExecutionSettings = {
+ default: {
+ cache: {
+ enabled: false,
+ },
+ retry: {
+ enabled: false,
+ },
+ timeout: 120_000,
+ },
+ stream: {
+ bufferSize: 200,
+ },
+};
diff --git a/packages/appkit/src/plugins/serving/index.ts b/packages/appkit/src/plugins/serving/index.ts
new file mode 100644
index 00000000..85caf33b
--- /dev/null
+++ b/packages/appkit/src/plugins/serving/index.ts
@@ -0,0 +1,2 @@
+export * from "./serving";
+export * from "./types";
diff --git a/packages/appkit/src/plugins/serving/manifest.json b/packages/appkit/src/plugins/serving/manifest.json
new file mode 100644
index 00000000..9ac0845f
--- /dev/null
+++ b/packages/appkit/src/plugins/serving/manifest.json
@@ -0,0 +1,54 @@
+{
+ "$schema": "https://databricks.github.io/appkit/schemas/plugin-manifest.schema.json",
+ "name": "serving",
+ "displayName": "Model Serving Plugin",
+ "description": "Authenticated proxy to Databricks Model Serving endpoints",
+ "resources": {
+ "required": [
+ {
+ "type": "serving_endpoint",
+ "alias": "Serving Endpoint",
+ "resourceKey": "serving-endpoint",
+ "description": "Model Serving endpoint for inference",
+ "permission": "CAN_QUERY",
+ "fields": {
+ "name": {
+ "env": "DATABRICKS_SERVING_ENDPOINT",
+ "description": "Serving endpoint name"
+ }
+ }
+ }
+ ],
+ "optional": []
+ },
+ "config": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "endpoints": {
+ "type": "object",
+ "description": "Map of alias names to endpoint configurations",
+ "additionalProperties": {
+ "type": "object",
+ "properties": {
+ "env": {
+ "type": "string",
+ "description": "Environment variable holding the endpoint name"
+ },
+ "servedModel": {
+ "type": "string",
+ "description": "Target a specific served model (bypasses traffic routing)"
+ }
+ },
+ "required": ["env"]
+ }
+ },
+ "timeout": {
+ "type": "number",
+ "default": 120000,
+ "description": "Request timeout in ms. Default: 120000 (2 min)"
+ }
+ }
+ }
+ }
+}
diff --git a/packages/appkit/src/plugins/serving/schema-filter.ts b/packages/appkit/src/plugins/serving/schema-filter.ts
new file mode 100644
index 00000000..92a25c69
--- /dev/null
+++ b/packages/appkit/src/plugins/serving/schema-filter.ts
@@ -0,0 +1,95 @@
+import fs from "node:fs/promises";
+import { createLogger } from "../../logging/logger";
+import {
+ CACHE_VERSION,
+ type ServingCache,
+} from "../../type-generator/serving/cache";
+
+const logger = createLogger("serving:schema-filter");
+
+function isValidCache(data: unknown): data is ServingCache {
+ return (
+ typeof data === "object" &&
+ data !== null &&
+ "version" in data &&
+ (data as ServingCache).version === CACHE_VERSION &&
+ "endpoints" in data &&
+ typeof (data as ServingCache).endpoints === "object"
+ );
+}
+
+/**
+ * Loads endpoint schemas from the type generation cache file.
+ * Returns a map of alias → allowed parameter keys.
+ */
+export async function loadEndpointSchemas(
+ cacheFile: string,
+): Promise