diff --git a/packages/pipelines/package.json b/packages/pipelines/package.json new file mode 100644 index 000000000..cdc152275 --- /dev/null +++ b/packages/pipelines/package.json @@ -0,0 +1,59 @@ +{ + "name": "@ucdjs/pipelines", + "version": "1.0.0", + "type": "module", + "author": { + "name": "Lucas Norgaard", + "email": "lucasnrgaard@gmail.com", + "url": "https://luxass.dev" + }, + "packageManager": "pnpm@10.27.0", + "license": "MIT", + "homepage": "https://github.com/ucdjs/ucd", + "repository": { + "type": "git", + "url": "git+https://github.com/ucdjs/ucd.git", + "directory": "packages/pipelines" + }, + "bugs": { + "url": "https://github.com/ucdjs/ucd/issues" + }, + "exports": { + ".": "./dist/index.mjs", + "./package.json": "./package.json" + }, + "main": "./dist/index.mjs", + "module": "./dist/index.mjs", + "types": "./dist/index.d.mts", + "files": [ + "dist" + ], + "engines": { + "node": ">=22.18" + }, + "scripts": { + "build": "tsdown --tsconfig=./tsconfig.build.json", + "dev": "tsdown --watch", + "clean": "git clean -xdf dist node_modules", + "lint": "eslint .", + "typecheck": "tsc --noEmit -p tsconfig.build.json" + }, + "dependencies": { + "@ucdjs-internal/shared": "workspace:*", + "picomatch": "catalog:prod" + }, + "devDependencies": { + "@luxass/eslint-config": "catalog:linting", + "@types/picomatch": "catalog:types", + "@ucdjs-tooling/tsconfig": "workspace:*", + "@ucdjs-tooling/tsdown-config": "workspace:*", + "eslint": "catalog:linting", + "publint": "catalog:dev", + "tsdown": "catalog:dev", + "tsx": "catalog:dev", + "typescript": "catalog:dev" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/packages/pipelines/src/artifact.ts b/packages/pipelines/src/artifact.ts new file mode 100644 index 000000000..eb558d8db --- /dev/null +++ b/packages/pipelines/src/artifact.ts @@ -0,0 +1,31 @@ +import type { ParseContext, ParsedRow, PipelineFilter } from "./types"; + +export interface ArtifactBuildContext { + version: string; +} + +export interface PipelineArtifactDefinition< + TId extends string = string, + TValue = unknown, +> { + id: TId; + filter?: PipelineFilter; + parser?: (ctx: ParseContext) => AsyncIterable; + build: (ctx: ArtifactBuildContext, rows?: AsyncIterable) => Promise; +} + +export function definePipelineArtifact< + const TId extends string, + TValue, +>( + definition: PipelineArtifactDefinition, +): PipelineArtifactDefinition { + return definition; +} + +export type InferArtifactId = T extends PipelineArtifactDefinition ? TId : never; +export type InferArtifactValue = T extends PipelineArtifactDefinition ? TValue : never; + +export type InferArtifactsMap = { + [K in T[number] as InferArtifactId]: InferArtifactValue; +}; diff --git a/packages/pipelines/src/events.ts b/packages/pipelines/src/events.ts new file mode 100644 index 000000000..4d7cbc813 --- /dev/null +++ b/packages/pipelines/src/events.ts @@ -0,0 +1,165 @@ +import type { FileContext } from "./types"; + +export type PipelineEventType = + | "pipeline:start" + | "pipeline:end" + | "version:start" + | "version:end" + | "artifact:start" + | "artifact:end" + | "file:matched" + | "file:skipped" + | "file:fallback" + | "parse:start" + | "parse:end" + | "resolve:start" + | "resolve:end" + | "error"; + +export type PipelineStartEvent = { + type: "pipeline:start"; + versions: string[]; + timestamp: number; +}; + +export type PipelineEndEvent = { + type: "pipeline:end"; + durationMs: number; + timestamp: number; +}; + +export type VersionStartEvent = { + type: "version:start"; + version: string; + timestamp: number; +}; + +export type VersionEndEvent = { + type: "version:end"; + version: string; + durationMs: number; + timestamp: number; +}; + +export type ArtifactStartEvent = { + type: "artifact:start"; + artifactId: string; + version: string; + timestamp: number; +}; + +export type ArtifactEndEvent = { + type: "artifact:end"; + artifactId: string; + version: string; + durationMs: number; + timestamp: number; +}; + +export type FileMatchedEvent = { + type: "file:matched"; + file: FileContext; + routeId: string; + timestamp: number; +}; + +export type FileSkippedEvent = { + type: "file:skipped"; + file: FileContext; + reason: "no-match" | "filtered"; + timestamp: number; +}; + +export type FileFallbackEvent = { + type: "file:fallback"; + file: FileContext; + timestamp: number; +}; + +export type ParseStartEvent = { + type: "parse:start"; + file: FileContext; + routeId: string; + timestamp: number; +}; + +export type ParseEndEvent = { + type: "parse:end"; + file: FileContext; + routeId: string; + rowCount: number; + durationMs: number; + timestamp: number; +}; + +export type ResolveStartEvent = { + type: "resolve:start"; + file: FileContext; + routeId: string; + timestamp: number; +}; + +export type ResolveEndEvent = { + type: "resolve:end"; + file: FileContext; + routeId: string; + outputCount: number; + durationMs: number; + timestamp: number; +}; + +export type PipelineErrorEvent = { + type: "error"; + error: PipelineError; + timestamp: number; +}; + +export type PipelineEvent = + | PipelineStartEvent + | PipelineEndEvent + | VersionStartEvent + | VersionEndEvent + | ArtifactStartEvent + | ArtifactEndEvent + | FileMatchedEvent + | FileSkippedEvent + | FileFallbackEvent + | ParseStartEvent + | ParseEndEvent + | ResolveStartEvent + | ResolveEndEvent + | PipelineErrorEvent; + +export type PipelineErrorScope = "pipeline" | "version" | "file" | "route" | "artifact"; + +export interface PipelineError { + scope: PipelineErrorScope; + message: string; + error?: unknown; + file?: FileContext; + routeId?: string; + artifactId?: string; + version?: string; +} + +export type PipelineGraphNodeType = "source" | "file" | "route" | "artifact" | "output"; + +export type PipelineGraphNode = + | { id: string; type: "source"; version: string } + | { id: string; type: "file"; file: FileContext } + | { id: string; type: "route"; routeId: string } + | { id: string; type: "artifact"; artifactId: string } + | { id: string; type: "output"; outputIndex: number; property?: string }; + +export type PipelineGraphEdgeType = "provides" | "matched" | "parsed" | "resolved" | "uses-artifact"; + +export interface PipelineGraphEdge { + from: string; + to: string; + type: PipelineGraphEdgeType; +} + +export interface PipelineGraph { + nodes: PipelineGraphNode[]; + edges: PipelineGraphEdge[]; +} diff --git a/packages/pipelines/src/filters.ts b/packages/pipelines/src/filters.ts new file mode 100644 index 000000000..63de9c7eb --- /dev/null +++ b/packages/pipelines/src/filters.ts @@ -0,0 +1,58 @@ +import type { FileContext, PipelineFilter } from "./types"; +import picomatch from "picomatch"; + +export function byName(name: string): PipelineFilter { + return (ctx) => ctx.file.name === name; +} + +export function byDir(dir: FileContext["dir"]): PipelineFilter { + return (ctx) => ctx.file.dir === dir; +} + +export function byExt(ext: string): PipelineFilter { + // Handle empty extension case (files without extension like "Makefile") + if (ext === "") { + return (ctx) => ctx.file.ext === ""; + } + const normalizedExt = ext.startsWith(".") ? ext : `.${ext}`; + return (ctx) => ctx.file.ext === normalizedExt; +} + +export function byGlob(pattern: string): PipelineFilter { + const matcher = picomatch(pattern); + return (ctx) => matcher(ctx.file.path); +} + +export function byPath(pathPattern: string | RegExp): PipelineFilter { + if (typeof pathPattern === "string") { + return (ctx) => ctx.file.path === pathPattern; + } + return (ctx) => pathPattern.test(ctx.file.path); +} + +export function byProp(pattern: string | RegExp): PipelineFilter { + if (typeof pattern === "string") { + return (ctx) => ctx.row?.property === pattern; + } + return (ctx) => !!ctx.row?.property && pattern.test(ctx.row.property); +} + +export function and(...filters: PipelineFilter[]): PipelineFilter { + return (ctx) => filters.every((f) => f(ctx)); +} + +export function or(...filters: PipelineFilter[]): PipelineFilter { + return (ctx) => filters.some((f) => f(ctx)); +} + +export function not(filter: PipelineFilter): PipelineFilter { + return (ctx) => !filter(ctx); +} + +export function always(): PipelineFilter { + return () => true; +} + +export function never(): PipelineFilter { + return () => false; +} diff --git a/packages/pipelines/src/index.ts b/packages/pipelines/src/index.ts new file mode 100644 index 000000000..3a46f38b9 --- /dev/null +++ b/packages/pipelines/src/index.ts @@ -0,0 +1,85 @@ +export { + definePipeline, + type FallbackRouteDefinition, + type Pipeline, + type PipelineOptions, +} from "./pipeline"; + +export { + definePipelineArtifact, + type ArtifactBuildContext, + type InferArtifactId, + type InferArtifactsMap, + type InferArtifactValue, + type PipelineArtifactDefinition, +} from "./artifact"; + +export { + definePipelineRoute, + type InferRouteId, + type InferRouteOutput, + type InferRoutesOutput, + type PipelineRouteDefinition, +} from "./route"; + +export { + always, + and, + byDir, + byExt, + byGlob, + byName, + byPath, + byProp, + never, + not, + or, +} from "./filters"; + +export type { + DefaultRange, + FileContext, + FilterContext, + ParseContext, + ParsedRow, + ParserFn, + PipelineFilter, + PipelineSource, + PropertyJson, + ResolvedEntry, + ResolveContext, + ResolverFn, + RouteOutput, + RowContext, +} from "./types"; + +export type { + ArtifactEndEvent, + ArtifactStartEvent, + FileMatchedEvent, + FileSkippedEvent, + FileFallbackEvent, + ParseEndEvent, + ParseStartEvent, + PipelineEndEvent, + PipelineError, + PipelineErrorEvent, + PipelineErrorScope, + PipelineEvent, + PipelineEventType, + PipelineGraph, + PipelineGraphEdge, + PipelineGraphEdgeType, + PipelineGraphNode, + PipelineGraphNodeType, + PipelineStartEvent, + ResolveEndEvent, + ResolveStartEvent, + VersionEndEvent, + VersionStartEvent, +} from "./events"; + +export type { + PipelineRunResult, + PipelineSummary, +} from "./results"; diff --git a/packages/pipelines/src/pipeline.ts b/packages/pipelines/src/pipeline.ts new file mode 100644 index 000000000..995b59705 --- /dev/null +++ b/packages/pipelines/src/pipeline.ts @@ -0,0 +1,584 @@ +import type { InferArtifactsMap, PipelineArtifactDefinition } from "./artifact"; +import type { PipelineEvent, PipelineGraph, PipelineGraphEdge, PipelineGraphNode } from "./events"; +import type { PipelineRunResult, PipelineSummary } from "./results"; +import type { InferRoutesOutput, PipelineRouteDefinition } from "./route"; +import type { + FileContext, + ParseContext, + ParsedRow, + PipelineFilter, + PipelineSource, + ResolvedEntry, + ResolveContext, +} from "./types"; + +export interface FallbackRouteDefinition< + TArtifacts extends Record = Record, + TOutput = unknown, +> { + filter?: PipelineFilter; + parser: (ctx: ParseContext) => AsyncIterable; + resolver: (ctx: ResolveContext, rows: AsyncIterable) => Promise; +} + +export interface PipelineOptions< + TArtifacts extends readonly PipelineArtifactDefinition[] = readonly PipelineArtifactDefinition[], + TRoutes extends readonly PipelineRouteDefinition[] = readonly PipelineRouteDefinition[], +> { + versions: string[]; + source: PipelineSource; + artifacts?: TArtifacts; + routes: TRoutes; + include?: PipelineFilter; + strict?: boolean; + concurrency?: number; + fallback?: FallbackRouteDefinition>; + onEvent?: (event: PipelineEvent) => void | Promise; +} + +export interface Pipeline { + run: () => Promise>; +} + +type InferPipelineOutput< + TRoutes extends readonly PipelineRouteDefinition[], + TFallback extends FallbackRouteDefinition | undefined, +> = TFallback extends FallbackRouteDefinition + ? InferRoutesOutput | TFallbackOutput + : InferRoutesOutput; + +export function definePipeline< + const TArtifacts extends readonly PipelineArtifactDefinition[], + const TRoutes extends readonly PipelineRouteDefinition[], + TFallback extends FallbackRouteDefinition, unknown> | undefined = undefined, +>( + options: PipelineOptions & { fallback?: TFallback }, +): Pipeline> { + return createPipelineExecutor(options); +} + +function createPipelineExecutor( + options: PipelineOptions, +): Pipeline { + const { + versions, + source, + artifacts = [], + routes, + include, + strict = false, + concurrency = 4, + fallback, + onEvent, + } = options; + + async function emit(event: PipelineEvent): Promise { + if (onEvent) { + await onEvent(event); + } + } + + async function run(): Promise> { + const startTime = performance.now(); + const graphNodes: PipelineGraphNode[] = []; + const graphEdges: PipelineGraphEdge[] = []; + const allOutputs: TOutput[] = []; + const errors: PipelineRunResult["errors"] = []; + + let totalFiles = 0; + let matchedFiles = 0; + let skippedFiles = 0; + let fallbackFiles = 0; + + await emit({ type: "pipeline:start", versions, timestamp: Date.now() }); + + for (const version of versions) { + const versionStartTime = performance.now(); + await emit({ type: "version:start", version, timestamp: Date.now() }); + + const sourceNodeId = `source:${version}`; + graphNodes.push({ id: sourceNodeId, type: "source", version }); + + const artifactsMap: Record = {}; + + for (const artifactDef of artifacts) { + const artifactStartTime = performance.now(); + await emit({ + type: "artifact:start", + artifactId: artifactDef.id, + version, + timestamp: Date.now(), + }); + + const artifactNodeId = `artifact:${version}:${artifactDef.id}`; + graphNodes.push({ id: artifactNodeId, type: "artifact", artifactId: artifactDef.id }); + graphEdges.push({ from: sourceNodeId, to: artifactNodeId, type: "provides" }); + + try { + let rows: AsyncIterable | undefined; + + if (artifactDef.filter && artifactDef.parser) { + const files = await source.listFiles(version); + for (const file of files) { + if (artifactDef.filter({ file })) { + const parseCtx = createParseContext(file, source); + rows = artifactDef.parser(parseCtx); + break; + } + } + } + + const value = await artifactDef.build({ version }, rows); + artifactsMap[artifactDef.id] = value; + } catch (err) { + const pipelineError = { + scope: "artifact" as const, + message: err instanceof Error ? err.message : String(err), + error: err, + artifactId: artifactDef.id, + version, + }; + errors.push(pipelineError); + await emit({ + type: "error", + error: pipelineError, + timestamp: Date.now(), + }); + } + + await emit({ + type: "artifact:end", + artifactId: artifactDef.id, + version, + durationMs: performance.now() - artifactStartTime, + timestamp: Date.now(), + }); + } + + const files = await source.listFiles(version); + totalFiles += files.length; + + const filesToProcess = include + ? files.filter((file) => include({ file })) + : files; + + const processingQueue = createProcessingQueue(concurrency); + + for (const file of filesToProcess) { + await processingQueue.add(async () => { + const fileNodeId = `file:${version}:${file.path}`; + graphNodes.push({ id: fileNodeId, type: "file", file }); + graphEdges.push({ from: sourceNodeId, to: fileNodeId, type: "provides" }); + + const matchingRoute = routes.find((route) => route.filter({ file })); + + if (matchingRoute) { + matchedFiles++; + const routeNodeId = `route:${version}:${matchingRoute.id}`; + + if (!graphNodes.some((n) => n.id === routeNodeId)) { + graphNodes.push({ id: routeNodeId, type: "route", routeId: matchingRoute.id }); + } + + graphEdges.push({ from: fileNodeId, to: routeNodeId, type: "matched" }); + + await emit({ + type: "file:matched", + file, + routeId: matchingRoute.id, + timestamp: Date.now(), + }); + + try { + const outputs = await processRoute( + file, + matchingRoute, + artifactsMap, + source, + version, + emit, + ); + + for (const output of outputs) { + const outputIndex = allOutputs.length; + allOutputs.push(output as TOutput); + + const outputNodeId = `output:${version}:${outputIndex}`; + graphNodes.push({ + id: outputNodeId, + type: "output", + outputIndex, + property: (output as { property?: string }).property, + }); + graphEdges.push({ from: routeNodeId, to: outputNodeId, type: "resolved" }); + } + } catch (err) { + const pipelineError = { + scope: "route" as const, + message: err instanceof Error ? err.message : String(err), + error: err, + file, + routeId: matchingRoute.id, + version, + }; + errors.push(pipelineError); + await emit({ + type: "error", + error: pipelineError, + timestamp: Date.now(), + }); + } + } else if (fallback) { + const shouldUseFallback = !fallback.filter || fallback.filter({ file }); + + if (shouldUseFallback) { + fallbackFiles++; + + await emit({ + type: "file:fallback", + file, + timestamp: Date.now(), + }); + + try { + const outputs = await processFallback( + file, + fallback, + artifactsMap, + source, + version, + emit, + ); + + for (const output of outputs) { + const outputIndex = allOutputs.length; + allOutputs.push(output as TOutput); + + const outputNodeId = `output:${version}:${outputIndex}`; + graphNodes.push({ + id: outputNodeId, + type: "output", + outputIndex, + property: (output as { property?: string }).property, + }); + graphEdges.push({ from: fileNodeId, to: outputNodeId, type: "resolved" }); + } + } catch (err) { + const pipelineError = { + scope: "file" as const, + message: err instanceof Error ? err.message : String(err), + error: err, + file, + version, + }; + errors.push(pipelineError); + await emit({ + type: "error", + error: pipelineError, + timestamp: Date.now(), + }); + } + } else { + skippedFiles++; + await emit({ + type: "file:skipped", + file, + reason: "filtered", + timestamp: Date.now(), + }); + } + } else { + skippedFiles++; + + if (strict) { + const pipelineError = { + scope: "file" as const, + message: `No matching route for file: ${file.path}`, + file, + version, + }; + errors.push(pipelineError); + await emit({ + type: "error", + error: pipelineError, + timestamp: Date.now(), + }); + } else { + await emit({ + type: "file:skipped", + file, + reason: "no-match", + timestamp: Date.now(), + }); + } + } + }); + } + + await processingQueue.drain(); + + await emit({ + type: "version:end", + version, + durationMs: performance.now() - versionStartTime, + timestamp: Date.now(), + }); + } + + const durationMs = performance.now() - startTime; + + await emit({ + type: "pipeline:end", + durationMs, + timestamp: Date.now(), + }); + + const summary: PipelineSummary = { + versions, + totalFiles, + matchedFiles, + skippedFiles, + fallbackFiles, + totalOutputs: allOutputs.length, + durationMs, + }; + + const graph: PipelineGraph = { + nodes: graphNodes, + edges: graphEdges, + }; + + return { + data: allOutputs, + graph, + errors, + summary, + }; + } + + return { run }; +} + +function createParseContext(file: FileContext, source: PipelineSource): ParseContext { + let cachedContent: string | null = null; + + return { + file, + readContent: async () => { + if (cachedContent === null) { + cachedContent = await source.readFile(file); + } + return cachedContent; + }, + readLines: async function* () { + const content = await source.readFile(file); + const lines = content.split(/\r?\n/); + for (const line of lines) { + yield line; + } + }, + isComment: (line: string) => line.startsWith("#") || line.trim() === "", + }; +} + +function createResolveContext>( + version: string, + file: FileContext, + artifactsMap: TArtifacts, +): ResolveContext { + return { + version, + file, + getArtifact: (id: K) => artifactsMap[id], + normalizeEntries: (entries: ResolvedEntry[]) => { + return entries.sort((a, b) => { + const aStart = a.range?.split("..")[0] ?? a.codePoint ?? ""; + const bStart = b.range?.split("..")[0] ?? b.codePoint ?? ""; + return aStart.localeCompare(bStart); + }); + }, + now: () => new Date().toISOString(), + }; +} + +async function processRoute( + file: FileContext, + route: PipelineRouteDefinition, + artifactsMap: Record, + source: PipelineSource, + version: string, + emit: (event: PipelineEvent) => Promise, +): Promise { + const parseStartTime = performance.now(); + await emit({ + type: "parse:start", + file, + routeId: route.id, + timestamp: Date.now(), + }); + + const parseCtx = createParseContext(file, source); + const rows = route.parser(parseCtx); + + const collectedRows: ParsedRow[] = []; + const filteredRows = filterRows(rows, file, route.filter, collectedRows); + + await emit({ + type: "parse:end", + file, + routeId: route.id, + rowCount: collectedRows.length, + durationMs: performance.now() - parseStartTime, + timestamp: Date.now(), + }); + + const resolveStartTime = performance.now(); + await emit({ + type: "resolve:start", + file, + routeId: route.id, + timestamp: Date.now(), + }); + + const resolveCtx = createResolveContext(version, file, artifactsMap); + const outputs = await route.resolver(resolveCtx, filteredRows); + + const outputArray = Array.isArray(outputs) ? outputs : [outputs]; + + await emit({ + type: "resolve:end", + file, + routeId: route.id, + outputCount: outputArray.length, + durationMs: performance.now() - resolveStartTime, + timestamp: Date.now(), + }); + + return outputArray; +} + +async function processFallback( + file: FileContext, + fallback: FallbackRouteDefinition, + artifactsMap: Record, + source: PipelineSource, + version: string, + emit: (event: PipelineEvent) => Promise, +): Promise { + const parseStartTime = performance.now(); + await emit({ + type: "parse:start", + file, + routeId: "__fallback__", + timestamp: Date.now(), + }); + + const parseCtx = createParseContext(file, source); + const rows = fallback.parser(parseCtx); + + const collectedRows: ParsedRow[] = []; + const filteredRows = filterRows(rows, file, fallback.filter, collectedRows); + + await emit({ + type: "parse:end", + file, + routeId: "__fallback__", + rowCount: collectedRows.length, + durationMs: performance.now() - parseStartTime, + timestamp: Date.now(), + }); + + const resolveStartTime = performance.now(); + await emit({ + type: "resolve:start", + file, + routeId: "__fallback__", + timestamp: Date.now(), + }); + + const resolveCtx = createResolveContext(version, file, artifactsMap); + const outputs = await fallback.resolver(resolveCtx, filteredRows); + + const outputArray = Array.isArray(outputs) ? outputs : [outputs]; + + await emit({ + type: "resolve:end", + file, + routeId: "__fallback__", + outputCount: outputArray.length, + durationMs: performance.now() - resolveStartTime, + timestamp: Date.now(), + }); + + return outputArray; +} + +async function* filterRows( + rows: AsyncIterable, + file: FileContext, + filter: PipelineFilter | undefined, + collector: ParsedRow[], +): AsyncIterable { + for await (const row of rows) { + collector.push(row); + + if (!filter) { + yield row; + continue; + } + + const shouldInclude = filter({ + file, + row: { property: row.property }, + }); + + if (shouldInclude) { + yield row; + } + } +} + +interface ProcessingQueue { + add: (task: () => Promise) => Promise; + drain: () => Promise; +} + +function createProcessingQueue(concurrency: number): ProcessingQueue { + const queue: (() => Promise)[] = []; + let running = 0; + let resolveIdle: (() => void) | null = null; + + async function runNext(): Promise { + if (running >= concurrency || queue.length === 0) { + if (running === 0 && queue.length === 0 && resolveIdle) { + resolveIdle(); + } + return; + } + + running++; + const task = queue.shift()!; + + try { + await task(); + } finally { + running--; + runNext(); + } + } + + return { + add: async (task) => { + queue.push(task); + runNext(); + }, + drain: () => { + if (running === 0 && queue.length === 0) { + return Promise.resolve(); + } + return new Promise((resolve) => { + resolveIdle = resolve; + }); + }, + }; +} diff --git a/packages/pipelines/src/results.ts b/packages/pipelines/src/results.ts new file mode 100644 index 000000000..09db2b28c --- /dev/null +++ b/packages/pipelines/src/results.ts @@ -0,0 +1,18 @@ +import type { PipelineError, PipelineGraph } from "./events"; + +export interface PipelineSummary { + versions: string[]; + totalFiles: number; + matchedFiles: number; + skippedFiles: number; + fallbackFiles: number; + totalOutputs: number; + durationMs: number; +} + +export interface PipelineRunResult { + data: TData[]; + graph: PipelineGraph; + errors: PipelineError[]; + summary: PipelineSummary; +} diff --git a/packages/pipelines/src/route.ts b/packages/pipelines/src/route.ts new file mode 100644 index 000000000..d310ef8f4 --- /dev/null +++ b/packages/pipelines/src/route.ts @@ -0,0 +1,38 @@ +import type { + ParserFn, + ParsedRow, + PipelineFilter, + PropertyJson, + ResolveContext, + RouteOutput, +} from "./types"; + +export interface PipelineRouteDefinition< + TId extends string = string, + TArtifacts extends Record = Record, + TOutput = PropertyJson[], +> { + id: TId; + filter: PipelineFilter; + parser: ParserFn; + resolver: (ctx: ResolveContext, rows: AsyncIterable) => Promise; + out?: RouteOutput; +} + +export function definePipelineRoute< + const TId extends string, + TArtifacts extends Record = Record, + TOutput = PropertyJson[], +>( + definition: PipelineRouteDefinition, +): PipelineRouteDefinition { + return definition; +} + +export type InferRouteId = T extends PipelineRouteDefinition, unknown> ? TId : never; +export type InferRouteOutput = T extends PipelineRouteDefinition, infer TOutput> ? TOutput : never; + +export type InferRoutesOutput = + T[number] extends PipelineRouteDefinition, infer TOutput> + ? TOutput extends unknown[] ? TOutput[number] : TOutput + : never; diff --git a/packages/pipelines/src/types.ts b/packages/pipelines/src/types.ts new file mode 100644 index 000000000..d7a73f563 --- /dev/null +++ b/packages/pipelines/src/types.ts @@ -0,0 +1,289 @@ +/** + * Represents the context of a file being processed in the pipeline. + */ +export interface FileContext { + /** + * The Unicode version being processed (e.g., "16.0.0"). + */ + version: string; + + /** + * The directory category of the file. + */ + dir: "ucd" | "extracted" | "auxiliary" | "emoji" | "unihan" | string; + + /** + * The relative path from the version root (e.g., "ucd/LineBreak.txt"). + */ + path: string; + + /** + * The file name (e.g., "LineBreak.txt"). + */ + name: string; + + /** + * The file extension (e.g., ".txt"). + */ + ext: string; +} + +/** + * Context for a specific row/line within a file. + * Used during row-level filtering in multi-property files. + */ +export interface RowContext { + /** + * The property name for multi-property files (e.g., "NFKC_Casefold"). + */ + property?: string; +} + +/** + * Combined context passed to filter predicates. + * During file routing, only `file` is defined. + * During row filtering, both `file` and `row` are defined. + */ +export interface FilterContext { + /** + * The file context. + */ + file: FileContext; + + /** + * The row context (only defined during row-level filtering). + */ + row?: RowContext; +} + +/** + * A predicate function that determines if a file or row should be processed. + */ +export type PipelineFilter = (ctx: FilterContext) => boolean; + +/** + * A parsed row from a UCD file. + */ +export interface ParsedRow { + /** + * The source file path relative to the version root. + */ + sourceFile: string; + + /** + * The kind of entry. + */ + kind: "range" | "point" | "sequence" | "alias"; + + /** + * Start of range (hex string, e.g., "0041"). + */ + start?: string; + + /** + * End of range (hex string, e.g., "005A"). + */ + end?: string; + + /** + * Single code point (hex string). + */ + codePoint?: string; + + /** + * Sequence of code points (hex strings). + */ + sequence?: string[]; + + /** + * Property name for multi-property files. + */ + property?: string; + + /** + * The value(s) associated with this entry. + */ + value?: string | string[]; + + /** + * Additional metadata (comments, line numbers, etc.). + */ + meta?: Record; +} + +/** + * Context passed to parser functions. + */ +export interface ParseContext { + /** + * The file being parsed. + */ + file: FileContext; + + /** + * Read the raw content of the file. + */ + readContent: () => Promise; + + /** + * Read the file line by line. + */ + readLines: () => AsyncIterable; + + /** + * Check if a line is a comment. + */ + isComment: (line: string) => boolean; +} + +/** + * A parser function that converts file content to parsed rows. + */ +export type ParserFn = (ctx: ParseContext) => AsyncIterable; + +/** + * A resolved entry in the output JSON. + */ +export interface ResolvedEntry { + /** + * Range in "XXXX..YYYY" format (hex, inclusive). + */ + range?: `${string}..${string}`; + + /** + * Single code point in hex. + */ + codePoint?: string; + + /** + * Sequence of code points. + */ + sequence?: string[]; + + /** + * The value(s) for this entry. + */ + value: string | string[]; +} + +/** + * A default range from @missing declarations. + */ +export interface DefaultRange { + /** + * The range this default applies to. + */ + range: `${string}..${string}`; + + /** + * The default value. + */ + value: string | string[]; +} + +/** + * The standardized JSON output for a property. + */ +export interface PropertyJson { + /** + * The Unicode version (e.g., "16.0.0"). + */ + version: string; + + /** + * The property name (e.g., "Line_Break"). + */ + property: string; + + /** + * The source file name (e.g., "LineBreak.txt"). + */ + file: string; + + /** + * The resolved entries. + */ + entries: ResolvedEntry[]; + + /** + * Default ranges from @missing (in encounter order). + */ + defaults?: DefaultRange[]; + + /** + * Additional metadata. + */ + meta?: Record; +} + +/** + * Context passed to resolver functions. + */ +export interface ResolveContext = Record> { + /** + * The Unicode version being processed. + */ + version: string; + + /** + * The file being resolved. + */ + file: FileContext; + + /** + * Get a previously built artifact by ID. + */ + getArtifact: (id: K) => TArtifacts[K] | undefined; + + /** + * Normalize entries (sort, merge adjacent equal-value ranges, canonicalize hex). + */ + normalizeEntries: (entries: ResolvedEntry[]) => ResolvedEntry[]; + + /** + * Get the current ISO timestamp. + */ + now: () => string; +} + +/** + * A resolver function that converts parsed rows to property JSON. + */ +export type ResolverFn< + TArtifacts extends Record = Record, + TOutput = PropertyJson[], +> = ( + ctx: ResolveContext, + rows: AsyncIterable, +) => Promise; + +/** + * Source adapter for providing files to the pipeline. + * This abstraction allows different backends (ucd-store, HTTP, memory, etc.). + */ +export interface PipelineSource { + /** + * List all files for a given version. + */ + listFiles: (version: string) => Promise; + + /** + * Read the content of a file. + */ + readFile: (file: FileContext) => Promise; +} + +/** + * Output configuration for a route. + */ +export interface RouteOutput { + /** + * Custom output directory. + */ + dir?: string; + + /** + * Custom file name generator. + */ + fileName?: (pj: PropertyJson) => string; +} diff --git a/packages/pipelines/test/artifact.test.ts b/packages/pipelines/test/artifact.test.ts new file mode 100644 index 000000000..5e68b120e --- /dev/null +++ b/packages/pipelines/test/artifact.test.ts @@ -0,0 +1,386 @@ +import type { FileContext, ParseContext, ParsedRow } from "../src/types"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { + definePipelineArtifact, + type InferArtifactId, + type InferArtifactsMap, + type InferArtifactValue, + type PipelineArtifactDefinition, +} from "../src/artifact"; +import { byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { definePipelineRoute } from "../src/route"; + +function createMockSource(files: Record>) { + return { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }; +} + +function createRow(ctx: ParseContext, props: Partial): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("definePipelineArtifact", () => { + it("should create an artifact definition with id and build function", () => { + const artifact = definePipelineArtifact({ + id: "test-artifact", + build: async () => ({ value: 42 }), + }); + + expect(artifact.id).toBe("test-artifact"); + expect(typeof artifact.build).toBe("function"); + }); + + it("should preserve the artifact id as a literal type", () => { + const artifact = definePipelineArtifact({ + id: "my-specific-id", + build: async () => "result", + }); + + expectTypeOf(artifact.id).toEqualTypeOf<"my-specific-id">(); + }); + + it("should infer the build return type", () => { + const mapArtifact = definePipelineArtifact({ + id: "map-artifact", + build: async () => new Map(), + }); + + const setArtifact = definePipelineArtifact({ + id: "set-artifact", + build: async () => new Set(), + }); + + const objectArtifact = definePipelineArtifact({ + id: "object-artifact", + build: async () => ({ count: 0, names: ["a", "b"] }), + }); + + expectTypeOf>().toEqualTypeOf>(); + expectTypeOf>().toEqualTypeOf>(); + expectTypeOf>().toEqualTypeOf<{ count: number; names: string[] }>(); + }); + + it("should support optional filter and parser", () => { + const artifact = definePipelineArtifact({ + id: "with-parser", + filter: byName("PropertyValueAliases.txt"), + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (!ctx.isComment(line)) { + yield createRow(ctx, { property: line, value: line }); + } + } + }, + build: async (_ctx, rows) => { + const aliases = new Map(); + if (rows) { + for await (const row of rows) { + if (row.property) { + aliases.set(row.property, [row.value as string]); + } + } + } + return aliases; + }, + }); + + expect(artifact.filter).toBeDefined(); + expect(artifact.parser).toBeDefined(); + expectTypeOf>().toEqualTypeOf>(); + }); + + it("should receive version in build context", async () => { + let receivedVersion: string | undefined; + + const artifact = definePipelineArtifact({ + id: "version-check", + build: async (ctx) => { + receivedVersion = ctx.version; + return ctx.version; + }, + }); + + const route = definePipelineRoute({ + id: "dummy", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx) => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + artifacts: [artifact], + routes: [route], + }); + + await pipeline.run(); + + expect(receivedVersion).toBe("16.0.0"); + }); + + it("should build artifacts before routes execute", async () => { + const executionOrder: string[] = []; + + const artifact = definePipelineArtifact({ + id: "first", + build: async () => { + executionOrder.push("artifact:build"); + return 42; + }, + }); + + const route = definePipelineRoute({ + id: "second", + filter: byName("test.txt"), + parser: async function* (ctx) { + executionOrder.push("route:parse"); + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows) => { + for await (const _row of rows) { + // consume rows to trigger parser + } + executionOrder.push("route:resolve"); + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + artifacts: [artifact], + routes: [route], + }); + + await pipeline.run(); + + expect(executionOrder[0]).toBe("artifact:build"); + expect(executionOrder).toContain("route:parse"); + expect(executionOrder).toContain("route:resolve"); + }); + + it("should make artifact values available in resolver via getArtifact", async () => { + const aliasArtifact = definePipelineArtifact({ + id: "aliases", + build: async () => new Map([["A", "LATIN CAPITAL LETTER A"]]), + }); + + let retrievedValue: Map | undefined; + + const route = definePipelineRoute({ + id: "consumer", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", property: "A", value: "A" }); + }, + resolver: async (ctx) => { + retrievedValue = ctx.getArtifact("aliases") as Map | undefined; + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + artifacts: [aliasArtifact], + routes: [route], + }); + + await pipeline.run(); + + expect(retrievedValue).toBeInstanceOf(Map); + expect(retrievedValue?.get("A")).toBe("LATIN CAPITAL LETTER A"); + }); + + it("should rebuild artifacts for each version", async () => { + const buildCalls: string[] = []; + + const artifact = definePipelineArtifact({ + id: "per-version", + build: async (ctx) => { + buildCalls.push(ctx.version); + return ctx.version; + }, + }); + + const route = definePipelineRoute({ + id: "dummy", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx) => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0", "14.0.0"], + source: createMockSource({ + "16.0.0": { "test.txt": "a" }, + "15.1.0": { "test.txt": "b" }, + "14.0.0": { "test.txt": "c" }, + }), + artifacts: [artifact], + routes: [route], + }); + + await pipeline.run(); + + expect(buildCalls).toEqual(["16.0.0", "15.1.0", "14.0.0"]); + }); + + it("should emit artifact events", async () => { + const events: string[] = []; + + const artifact = definePipelineArtifact({ + id: "event-test", + build: async () => "value", + }); + + const route = definePipelineRoute({ + id: "dummy", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx) => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + artifacts: [artifact], + routes: [route], + onEvent: (event) => { + if (event.type.startsWith("artifact:")) { + events.push(event.type); + } + }, + }); + + await pipeline.run(); + + expect(events).toContain("artifact:start"); + expect(events).toContain("artifact:end"); + }); + + it("should handle artifact build errors", async () => { + const artifact = definePipelineArtifact({ + id: "failing", + build: async () => { + throw new Error("Artifact build failed"); + }, + }); + + const route = definePipelineRoute({ + id: "dummy", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx) => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + artifacts: [artifact], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.scope).toBe("artifact"); + expect(result.errors[0]!.artifactId).toBe("failing"); + expect(result.errors[0]!.message).toBe("Artifact build failed"); + }); +}); + +describe("artifact type inference", () => { + it("should infer artifact id type", () => { + const artifact = definePipelineArtifact({ + id: "specific-id", + build: async () => 42, + }); + + type Id = InferArtifactId; + expectTypeOf().toEqualTypeOf<"specific-id">(); + }); + + it("should infer artifact value type", () => { + const artifact = definePipelineArtifact({ + id: "typed-value", + build: async () => ({ nested: { deep: true }, array: [1, 2, 3] }), + }); + + type Value = InferArtifactValue; + expectTypeOf().toEqualTypeOf<{ nested: { deep: boolean }; array: number[] }>(); + }); + + it("should infer artifacts map from array of artifacts", () => { + const aliasArtifact = definePipelineArtifact({ + id: "aliases", + build: async () => new Map(), + }); + + const countArtifact = definePipelineArtifact({ + id: "count", + build: async () => 42, + }); + + const configArtifact = definePipelineArtifact({ + id: "config", + build: async () => ({ enabled: true, threshold: 0.5 }), + }); + + type ArtifactsMap = InferArtifactsMap<[typeof aliasArtifact, typeof countArtifact, typeof configArtifact]>; + + expectTypeOf().toEqualTypeOf<{ + aliases: Map; + count: number; + config: { enabled: boolean; threshold: number }; + }>(); + }); + + it("should type PipelineArtifactDefinition with generics", () => { + type MapArtifact = PipelineArtifactDefinition<"map-id", Map>; + + const artifact: MapArtifact = { + id: "map-id", + build: async () => new Map(), + }; + + expectTypeOf(artifact.id).toEqualTypeOf<"map-id">(); + expectTypeOf(artifact.build).returns.resolves.toEqualTypeOf>(); + }); +}); diff --git a/packages/pipelines/test/events.test.ts b/packages/pipelines/test/events.test.ts new file mode 100644 index 000000000..f31edf6c1 --- /dev/null +++ b/packages/pipelines/test/events.test.ts @@ -0,0 +1,823 @@ +import type { FileContext, ParseContext, ParsedRow, PropertyJson } from "../src/types"; +import type { + ArtifactEndEvent, + ArtifactStartEvent, + FileMatchedEvent, + FileSkippedEvent, + ParseEndEvent, + ParseStartEvent, + PipelineEndEvent, + PipelineError, + PipelineErrorEvent, + PipelineEvent, + PipelineGraph, + PipelineGraphEdge, + PipelineGraphNode, + PipelineStartEvent, + ResolveEndEvent, + ResolveStartEvent, + VersionEndEvent, + VersionStartEvent, +} from "../src/events"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { definePipelineArtifact } from "../src/artifact"; +import { definePipelineRoute } from "../src/route"; + +function createMockSource(files: Record>) { + return { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }; +} + +function createRow(ctx: ParseContext, props: Partial): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("PipelineEvent types", () => { + it("should have correct PipelineStartEvent structure", () => { + const event: PipelineStartEvent = { + type: "pipeline:start", + versions: ["16.0.0"], + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"pipeline:start">(); + expectTypeOf(event.versions).toEqualTypeOf(); + expectTypeOf(event.timestamp).toEqualTypeOf(); + }); + + it("should have correct PipelineEndEvent structure", () => { + const event: PipelineEndEvent = { + type: "pipeline:end", + durationMs: 100, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"pipeline:end">(); + expectTypeOf(event.durationMs).toEqualTypeOf(); + expectTypeOf(event.timestamp).toEqualTypeOf(); + }); + + it("should have correct VersionStartEvent structure", () => { + const event: VersionStartEvent = { + type: "version:start", + version: "16.0.0", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"version:start">(); + expectTypeOf(event.version).toEqualTypeOf(); + }); + + it("should have correct VersionEndEvent structure", () => { + const event: VersionEndEvent = { + type: "version:end", + version: "16.0.0", + durationMs: 100, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"version:end">(); + expectTypeOf(event.durationMs).toEqualTypeOf(); + }); + + it("should have correct ArtifactStartEvent structure", () => { + const event: ArtifactStartEvent = { + type: "artifact:start", + artifactId: "names", + version: "16.0.0", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"artifact:start">(); + expectTypeOf(event.artifactId).toEqualTypeOf(); + expectTypeOf(event.version).toEqualTypeOf(); + }); + + it("should have correct ArtifactEndEvent structure", () => { + const event: ArtifactEndEvent = { + type: "artifact:end", + artifactId: "names", + version: "16.0.0", + durationMs: 50, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"artifact:end">(); + expectTypeOf(event.durationMs).toEqualTypeOf(); + }); + + it("should have correct FileMatchedEvent structure", () => { + const file: FileContext = { + path: "LineBreak.txt", + name: "LineBreak.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: FileMatchedEvent = { + type: "file:matched", + file, + routeId: "line-break", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"file:matched">(); + expectTypeOf(event.file).toEqualTypeOf(); + expectTypeOf(event.routeId).toEqualTypeOf(); + }); + + it("should have correct FileSkippedEvent structure", () => { + const file: FileContext = { + path: "Unknown.txt", + name: "Unknown.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: FileSkippedEvent = { + type: "file:skipped", + file, + reason: "no-match", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"file:skipped">(); + expectTypeOf(event.reason).toEqualTypeOf<"no-match" | "filtered">(); + }); + + it("should have correct ParseStartEvent structure", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: ParseStartEvent = { + type: "parse:start", + file, + routeId: "test-route", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"parse:start">(); + expectTypeOf(event.file).toEqualTypeOf(); + expectTypeOf(event.routeId).toEqualTypeOf(); + }); + + it("should have correct ParseEndEvent structure", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: ParseEndEvent = { + type: "parse:end", + file, + routeId: "test-route", + rowCount: 100, + durationMs: 25, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"parse:end">(); + expectTypeOf(event.rowCount).toEqualTypeOf(); + expectTypeOf(event.durationMs).toEqualTypeOf(); + }); + + it("should have correct ResolveStartEvent structure", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: ResolveStartEvent = { + type: "resolve:start", + file, + routeId: "test-route", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"resolve:start">(); + }); + + it("should have correct ResolveEndEvent structure", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: ResolveEndEvent = { + type: "resolve:end", + file, + routeId: "test-route", + outputCount: 5, + durationMs: 10, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"resolve:end">(); + expectTypeOf(event.outputCount).toEqualTypeOf(); + }); + + it("should have correct PipelineErrorEvent structure", () => { + const error: PipelineError = { + scope: "route", + message: "Something went wrong", + routeId: "test-route", + version: "16.0.0", + }; + + const event: PipelineErrorEvent = { + type: "error", + error, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"error">(); + expectTypeOf(event.error).toEqualTypeOf(); + }); +}); + +describe("PipelineError", () => { + it("should support all error scopes", () => { + const scopes: PipelineError["scope"][] = [ + "pipeline", + "version", + "file", + "route", + "artifact", + ]; + + expect(scopes).toHaveLength(5); + }); + + it("should have optional file context", () => { + const errorWithFile: PipelineError = { + scope: "file", + message: "File error", + file: { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }, + }; + + const errorWithoutFile: PipelineError = { + scope: "pipeline", + message: "Pipeline error", + }; + + expect(errorWithFile.file).toBeDefined(); + expect(errorWithoutFile.file).toBeUndefined(); + }); + + it("should have optional routeId", () => { + const error: PipelineError = { + scope: "route", + message: "Route error", + routeId: "test-route", + }; + + expectTypeOf(error.routeId).toEqualTypeOf(); + }); + + it("should have optional artifactId", () => { + const error: PipelineError = { + scope: "artifact", + message: "Artifact error", + artifactId: "names", + }; + + expectTypeOf(error.artifactId).toEqualTypeOf(); + }); +}); + +describe("PipelineGraph", () => { + it("should have nodes and edges", () => { + const graph: PipelineGraph = { + nodes: [], + edges: [], + }; + + expectTypeOf(graph.nodes).toEqualTypeOf(); + expectTypeOf(graph.edges).toEqualTypeOf(); + }); + + it("should support source node type", () => { + const node: PipelineGraphNode = { + id: "source:16.0.0", + type: "source", + version: "16.0.0", + }; + + expect(node.type).toBe("source"); + expect(node.version).toBe("16.0.0"); + }); + + it("should support file node type", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const node: PipelineGraphNode = { + id: "file:16.0.0:test.txt", + type: "file", + file, + }; + + expect(node.type).toBe("file"); + expect(node.file).toEqual(file); + }); + + it("should support route node type", () => { + const node: PipelineGraphNode = { + id: "route:16.0.0:line-break", + type: "route", + routeId: "line-break", + }; + + expect(node.type).toBe("route"); + expect(node.routeId).toBe("line-break"); + }); + + it("should support artifact node type", () => { + const node: PipelineGraphNode = { + id: "artifact:16.0.0:names", + type: "artifact", + artifactId: "names", + }; + + expect(node.type).toBe("artifact"); + expect(node.artifactId).toBe("names"); + }); + + it("should support output node type with optional property", () => { + const nodeWithProperty: PipelineGraphNode = { + id: "output:16.0.0:0", + type: "output", + outputIndex: 0, + property: "Line_Break", + }; + + const nodeWithoutProperty: PipelineGraphNode = { + id: "output:16.0.0:1", + type: "output", + outputIndex: 1, + }; + + expect(nodeWithProperty.property).toBe("Line_Break"); + expect(nodeWithoutProperty.property).toBeUndefined(); + }); + + it("should support all edge types", () => { + const edgeTypes: PipelineGraphEdge["type"][] = [ + "provides", + "matched", + "parsed", + "resolved", + "uses-artifact", + ]; + + expect(edgeTypes).toHaveLength(5); + }); + + it("should have from and to on edges", () => { + const edge: PipelineGraphEdge = { + from: "source:16.0.0", + to: "file:16.0.0:test.txt", + type: "provides", + }; + + expectTypeOf(edge.from).toEqualTypeOf(); + expectTypeOf(edge.to).toEqualTypeOf(); + expectTypeOf(edge.type).toEqualTypeOf<"provides" | "matched" | "parsed" | "resolved" | "uses-artifact">(); + }); +}); + +describe("Event emission during pipeline run", () => { + it("should emit pipeline:start and pipeline:end events", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const startEvent = events.find((e) => e.type === "pipeline:start"); + const endEvent = events.find((e) => e.type === "pipeline:end"); + + expect(startEvent).toBeDefined(); + expect(endEvent).toBeDefined(); + expect(startEvent?.type).toBe("pipeline:start"); + expect(endEvent?.type).toBe("pipeline:end"); + }); + + it("should emit version:start and version:end for each version", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + source: createMockSource({ + "16.0.0": { "test.txt": "content" }, + "15.1.0": { "test.txt": "content" }, + }), + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const versionStarts = events.filter((e) => e.type === "version:start"); + const versionEnds = events.filter((e) => e.type === "version:end"); + + expect(versionStarts).toHaveLength(2); + expect(versionEnds).toHaveLength(2); + }); + + it("should emit artifact events when artifacts are defined", async () => { + const events: PipelineEvent[] = []; + + const artifact = definePipelineArtifact({ + id: "names", + build: async () => new Map([["0041", "LATIN CAPITAL LETTER A"]]), + }); + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + artifacts: [artifact], + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const artifactStart = events.find((e) => e.type === "artifact:start"); + const artifactEnd = events.find((e) => e.type === "artifact:end"); + + expect(artifactStart).toBeDefined(); + expect(artifactEnd).toBeDefined(); + if (artifactStart?.type === "artifact:start") { + expect(artifactStart.artifactId).toBe("names"); + } + }); + + it("should emit file:matched for matched files", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ + "16.0.0": { + "matched.txt": "content", + "unmatched.txt": "content", + }, + }), + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const matchedEvents = events.filter((e) => e.type === "file:matched"); + expect(matchedEvents).toHaveLength(1); + + if (matchedEvents[0]?.type === "file:matched") { + expect(matchedEvents[0].file.name).toBe("matched.txt"); + expect(matchedEvents[0].routeId).toBe("test"); + } + }); + + it("should emit file:skipped for unmatched files", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ + "16.0.0": { + "matched.txt": "content", + "unmatched.txt": "content", + }, + }), + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const skippedEvents = events.filter((e) => e.type === "file:skipped"); + expect(skippedEvents).toHaveLength(1); + + if (skippedEvents[0]?.type === "file:skipped") { + expect(skippedEvents[0].file.name).toBe("unmatched.txt"); + expect(skippedEvents[0].reason).toBe("no-match"); + } + }); + + it("should emit parse and resolve events", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const parseStart = events.find((e) => e.type === "parse:start"); + const parseEnd = events.find((e) => e.type === "parse:end"); + const resolveStart = events.find((e) => e.type === "resolve:start"); + const resolveEnd = events.find((e) => e.type === "resolve:end"); + + expect(parseStart).toBeDefined(); + expect(parseEnd).toBeDefined(); + expect(resolveStart).toBeDefined(); + expect(resolveEnd).toBeDefined(); + }); + + it("should emit error events on failures", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (): Promise => { + throw new Error("Resolver failed"); + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const errorEvent = events.find((e) => e.type === "error"); + expect(errorEvent).toBeDefined(); + + if (errorEvent?.type === "error") { + expect(errorEvent.error.message).toBe("Resolver failed"); + expect(errorEvent.error.scope).toBe("route"); + } + }); +}); + +describe("Graph construction", () => { + it("should build graph with source nodes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + const sourceNodes = result.graph.nodes.filter((n) => n.type === "source"); + expect(sourceNodes).toHaveLength(1); + expect(sourceNodes[0]?.type === "source" && sourceNodes[0].version).toBe("16.0.0"); + }); + + it("should build graph with file nodes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + const fileNodes = result.graph.nodes.filter((n) => n.type === "file"); + expect(fileNodes).toHaveLength(1); + expect(fileNodes[0]?.type === "file" && fileNodes[0].file.name).toBe("test.txt"); + }); + + it("should build graph with route nodes", async () => { + const route = definePipelineRoute({ + id: "line-break", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + const routeNodes = result.graph.nodes.filter((n) => n.type === "route"); + expect(routeNodes).toHaveLength(1); + expect(routeNodes[0]?.type === "route" && routeNodes[0].routeId).toBe("line-break"); + }); + + it("should build graph with output nodes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + const outputNodes = result.graph.nodes.filter((n) => n.type === "output"); + expect(outputNodes).toHaveLength(1); + }); + + it("should create edges between nodes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.graph.edges.length).toBeGreaterThan(0); + + const providesEdges = result.graph.edges.filter((e) => e.type === "provides"); + const matchedEdges = result.graph.edges.filter((e) => e.type === "matched"); + const resolvedEdges = result.graph.edges.filter((e) => e.type === "resolved"); + + expect(providesEdges.length).toBeGreaterThan(0); + expect(matchedEdges.length).toBeGreaterThan(0); + expect(resolvedEdges.length).toBeGreaterThan(0); + }); +}); diff --git a/packages/pipelines/test/filters.test.ts b/packages/pipelines/test/filters.test.ts new file mode 100644 index 000000000..0fc2f0135 --- /dev/null +++ b/packages/pipelines/test/filters.test.ts @@ -0,0 +1,382 @@ +import type { FileContext } from "../src/types"; +import { describe, expect, it } from "vitest"; +import { + always, + and, + byDir, + byExt, + byGlob, + byName, + byPath, + byProp, + never, + not, + or, +} from "../src/filters"; + +function createFile(path: string, version = "16.0.0"): FileContext { + return { + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + }; +} + +describe("byName", () => { + it("should match exact file name", () => { + const filter = byName("LineBreak.txt"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(true); + }); + + it("should not match different file names", () => { + const filter = byName("LineBreak.txt"); + + expect(filter({ file: createFile("WordBreak.txt") })).toBe(false); + expect(filter({ file: createFile("linebreak.txt") })).toBe(false); + expect(filter({ file: createFile("LineBreak.html") })).toBe(false); + }); + + it("should match file name regardless of directory", () => { + const filter = byName("UnicodeData.txt"); + + expect(filter({ file: createFile("UnicodeData.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/UnicodeData.txt") })).toBe(true); + expect(filter({ file: createFile("deep/nested/path/UnicodeData.txt") })).toBe(true); + }); +}); + +describe("byDir", () => { + it("should match files in specific directory", () => { + const filter = byDir("ucd"); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/UnicodeData.txt") })).toBe(true); + }); + + it("should not match files in different directories", () => { + const filter = byDir("ucd"); + + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("extracted/DerivedName.txt") })).toBe(false); + expect(filter({ file: createFile("LineBreak.txt") })).toBe(false); + }); + + it("should match root directory with empty string", () => { + const filter = byDir(""); + + expect(filter({ file: createFile("ReadMe.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(false); + }); + + it("should match nested directories", () => { + const filter = byDir("ucd/auxiliary"); + + expect(filter({ file: createFile("ucd/auxiliary/WordBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(false); + }); +}); + +describe("byExt", () => { + it("should match files by extension", () => { + const filter = byExt(".txt"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/UnicodeData.txt") })).toBe(true); + }); + + it("should not match different extensions", () => { + const filter = byExt(".txt"); + + expect(filter({ file: createFile("ReadMe.html") })).toBe(false); + expect(filter({ file: createFile("emoji-data.json") })).toBe(false); + expect(filter({ file: createFile("config.xml") })).toBe(false); + }); + + it("should handle extension with or without dot", () => { + const filterWithDot = byExt(".json"); + const filterWithoutDot = byExt("json"); + + expect(filterWithDot({ file: createFile("data.json") })).toBe(true); + expect(filterWithoutDot({ file: createFile("data.json") })).toBe(true); + }); + + it("should match files without extension", () => { + const filter = byExt(""); + + expect(filter({ file: createFile("Makefile") })).toBe(true); + expect(filter({ file: createFile("LICENSE") })).toBe(true); + }); +}); + +describe("byPath", () => { + it("should match exact path", () => { + const filter = byPath("ucd/LineBreak.txt"); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + }); + + it("should not match different paths", () => { + const filter = byPath("ucd/LineBreak.txt"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/WordBreak.txt") })).toBe(false); + }); + + it("should be case-sensitive", () => { + const filter = byPath("ucd/LineBreak.txt"); + + expect(filter({ file: createFile("UCD/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/linebreak.txt") })).toBe(false); + }); +}); + +describe("byGlob", () => { + it("should match files with glob pattern", () => { + const filter = byGlob("**/*.txt"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/UnicodeData.txt") })).toBe(true); + expect(filter({ file: createFile("deep/nested/file.txt") })).toBe(true); + }); + + it("should not match non-matching files", () => { + const filter = byGlob("**/*.txt"); + + expect(filter({ file: createFile("ReadMe.html") })).toBe(false); + expect(filter({ file: createFile("data.json") })).toBe(false); + }); + + it("should support directory-specific patterns", () => { + const filter = byGlob("ucd/*.txt"); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("LineBreak.txt") })).toBe(false); + }); + + it("should support complex glob patterns", () => { + const filter = byGlob("**/auxiliary/*Test*.txt"); + + expect(filter({ file: createFile("auxiliary/WordBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/auxiliary/LineBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + }); + + it("should support negation patterns", () => { + const filter = byGlob("!**/*.html"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ReadMe.html") })).toBe(false); + }); + + it("should support brace expansion", () => { + const filter = byGlob("**/*.{txt,json}"); + + expect(filter({ file: createFile("data.txt") })).toBe(true); + expect(filter({ file: createFile("config.json") })).toBe(true); + expect(filter({ file: createFile("readme.html") })).toBe(false); + }); +}); + +describe("byProp", () => { + it("should match rows with specific property", () => { + const filter = byProp("Line_Break"); + + expect(filter({ + file: createFile("test.txt"), + row: { property: "Line_Break" }, + })).toBe(true); + }); + + it("should not match rows with different property", () => { + const filter = byProp("Line_Break"); + + expect(filter({ + file: createFile("test.txt"), + row: { property: "Word_Break" }, + })).toBe(false); + }); + + it("should not match when row is undefined", () => { + const filter = byProp("Line_Break"); + + expect(filter({ file: createFile("test.txt") })).toBe(false); + }); + + it("should not match when row property is undefined", () => { + const filter = byProp("Line_Break"); + + expect(filter({ + file: createFile("test.txt"), + row: {}, + })).toBe(false); + }); +}); + +describe("and", () => { + it("should return true when all filters match", () => { + const filter = and(byExt(".txt"), byDir("ucd")); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + }); + + it("should return false when any filter does not match", () => { + const filter = and(byExt(".txt"), byDir("ucd")); + + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/data.json") })).toBe(false); + }); + + it("should short-circuit evaluation", () => { + let secondCalled = false; + const filter = and( + () => false, + () => { secondCalled = true; return true; }, + ); + + filter({ file: createFile("test.txt") }); + + expect(secondCalled).toBe(false); + }); + + it("should handle multiple filters", () => { + const filter = and( + byExt(".txt"), + byDir("ucd"), + byName("LineBreak.txt"), + ); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/WordBreak.txt") })).toBe(false); + }); + + it("should return true for empty filter list", () => { + const filter = and(); + + expect(filter({ file: createFile("any.txt") })).toBe(true); + }); +}); + +describe("or", () => { + it("should return true when any filter matches", () => { + const filter = or(byName("LineBreak.txt"), byName("WordBreak.txt")); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("WordBreak.txt") })).toBe(true); + }); + + it("should return false when no filter matches", () => { + const filter = or(byName("LineBreak.txt"), byName("WordBreak.txt")); + + expect(filter({ file: createFile("GraphemeBreak.txt") })).toBe(false); + }); + + it("should short-circuit evaluation", () => { + let secondCalled = false; + const filter = or( + () => true, + () => { secondCalled = true; return false; }, + ); + + filter({ file: createFile("test.txt") }); + + expect(secondCalled).toBe(false); + }); + + it("should handle multiple filters", () => { + const filter = or( + byName("LineBreak.txt"), + byName("WordBreak.txt"), + byName("GraphemeBreak.txt"), + ); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("WordBreak.txt") })).toBe(true); + expect(filter({ file: createFile("GraphemeBreak.txt") })).toBe(true); + expect(filter({ file: createFile("SentenceBreak.txt") })).toBe(false); + }); + + it("should return false for empty filter list", () => { + const filter = or(); + + expect(filter({ file: createFile("any.txt") })).toBe(false); + }); +}); + +describe("not", () => { + it("should negate the filter result", () => { + const filter = not(byName("LineBreak.txt")); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("WordBreak.txt") })).toBe(true); + }); + + it("should work with complex filters", () => { + const filter = not(and(byExt(".txt"), byDir("ucd"))); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/data.json") })).toBe(true); + }); +}); + +describe("always", () => { + it("should always return true", () => { + const filter = always(); + + expect(filter({ file: createFile("any.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/data.json") })).toBe(true); + expect(filter({ file: createFile("deep/nested/path.xml") })).toBe(true); + }); +}); + +describe("never", () => { + it("should always return false", () => { + const filter = never(); + + expect(filter({ file: createFile("any.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/data.json") })).toBe(false); + expect(filter({ file: createFile("deep/nested/path.xml") })).toBe(false); + }); +}); + +describe("filter composition", () => { + it("should support complex compositions", () => { + const filter = or( + and(byDir("ucd"), byExt(".txt")), + and(byDir("auxiliary"), byGlob("**/*Test*.txt")), + ); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/WordBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/data.txt") })).toBe(false); + expect(filter({ file: createFile("other/file.txt") })).toBe(false); + }); + + it("should support exclusion patterns", () => { + const filter = and( + byExt(".txt"), + not(byGlob("**/*Test*.txt")), + ); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("TestLineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/WordBreakTest.txt") })).toBe(false); + }); + + it("should support version-aware filtering", () => { + const filter = and( + byName("UnicodeData.txt"), + (ctx) => ctx.file.version === "16.0.0", + ); + + expect(filter({ file: createFile("UnicodeData.txt", "16.0.0") })).toBe(true); + expect(filter({ file: createFile("UnicodeData.txt", "15.1.0") })).toBe(false); + }); +}); diff --git a/packages/pipelines/test/pipeline.test.ts b/packages/pipelines/test/pipeline.test.ts new file mode 100644 index 000000000..ae39fe108 --- /dev/null +++ b/packages/pipelines/test/pipeline.test.ts @@ -0,0 +1,553 @@ +import type { FileContext, ParseContext, ParsedRow, PropertyJson, ResolveContext } from "../src/types"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { definePipelineArtifact } from "../src/artifact"; +import { byDir, byExt, byGlob, byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { definePipelineRoute } from "../src/route"; + +function createMockSource(files: Record>) { + return { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }; +} + +function createRow(ctx: ParseContext, props: Partial & { codePoint?: string; property?: string }): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("definePipeline", () => { + it("should create a pipeline with routes", () => { + const route = definePipelineRoute({ + id: "test-route", + filter: byName("test.txt"), + parser: async function* (ctx) { + const content = await ctx.readContent(); + yield createRow(ctx, { codePoint: "0000", property: "test", value: content }); + }, + resolver: async (ctx, rows): Promise => { + const entries = []; + for await (const row of rows) { + entries.push({ codePoint: row.codePoint, value: row.value as string }); + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({}), + routes: [route], + }); + + expect(pipeline).toBeDefined(); + expect(typeof pipeline.run).toBe("function"); + }); + + it("should run a simple pipeline and return results", async () => { + const route = definePipelineRoute({ + id: "simple", + filter: byName("data.txt"), + parser: async function* (ctx) { + const content = await ctx.readContent(); + for (const line of content.split("\n")) { + if (line.trim()) { + const [codePoint, prop] = line.split(";").map((s) => s.trim()); + yield createRow(ctx, { codePoint, property: prop ?? "", value: prop ?? "" }); + } + } + }, + resolver: async (ctx, rows): Promise => { + const entries = []; + for await (const row of rows) { + entries.push({ codePoint: row.codePoint, value: row.property ?? "" }); + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries }]; + }, + }); + + const source = createMockSource({ + "16.0.0": { + "data.txt": "0041;Letter\n0042;Letter", + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + expect(result.data).toHaveLength(1); + expect(result.data[0]).toMatchObject({ + version: "16.0.0", + property: "Test", + }); + expect(result.summary.totalFiles).toBe(1); + expect(result.summary.matchedFiles).toBe(1); + expect(result.summary.skippedFiles).toBe(0); + }); + + it("should emit events during pipeline execution", async () => { + const events: string[] = []; + + const route = definePipelineRoute({ + id: "event-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { "test.txt": "content" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + routes: [route], + onEvent: (event) => { + events.push(event.type); + }, + }); + + await pipeline.run(); + + expect(events).toContain("pipeline:start"); + expect(events).toContain("pipeline:end"); + expect(events).toContain("version:start"); + expect(events).toContain("version:end"); + expect(events).toContain("file:matched"); + expect(events).toContain("parse:start"); + expect(events).toContain("parse:end"); + expect(events).toContain("resolve:start"); + expect(events).toContain("resolve:end"); + }); + + it("should build a pipeline graph", async () => { + const route = definePipelineRoute({ + id: "graph-test", + filter: byName("file.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { "file.txt": "content" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.graph.nodes.length).toBeGreaterThan(0); + expect(result.graph.edges.length).toBeGreaterThan(0); + + const nodeTypes = result.graph.nodes.map((n) => n.type); + expect(nodeTypes).toContain("source"); + expect(nodeTypes).toContain("file"); + expect(nodeTypes).toContain("route"); + expect(nodeTypes).toContain("output"); + + const edgeTypes = result.graph.edges.map((e) => e.type); + expect(edgeTypes).toContain("provides"); + expect(edgeTypes).toContain("matched"); + expect(edgeTypes).toContain("resolved"); + }); + + it("should support artifacts", async () => { + const aliasArtifact = definePipelineArtifact({ + id: "aliases", + build: async () => { + return new Map([["A", ["Letter_A", "Uppercase_A"]]]); + }, + }); + + const route = definePipelineRoute({ + id: "with-artifact", + filter: byName("data.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", property: "A", value: "A" }); + }, + resolver: async (ctx, rows): Promise => { + const aliases = ctx.getArtifact("aliases") as Map | undefined; + const entries = []; + for await (const row of rows) { + const propertyAliases = aliases?.get(row.property ?? "") ?? []; + entries.push({ + codePoint: row.codePoint, + value: propertyAliases.join(",") || (row.property ?? ""), + }); + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries }]; + }, + }); + + const source = createMockSource({ + "16.0.0": { "data.txt": "0041;A" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + artifacts: [aliasArtifact], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + expect(result.data).toHaveLength(1); + }); + + it("should use fallback for unmatched files", async () => { + const route = definePipelineRoute({ + id: "specific", + filter: byName("known.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "known", value: "known" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Known", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { + "known.txt": "content", + "unknown.txt": "other content", + }, + }); + + const fallbackRoute = { + parser: async function* (ctx: ParseContext) { + yield createRow(ctx, { codePoint: "FFFF", property: "fallback", value: await ctx.readContent() }); + }, + resolver: async (ctx: ResolveContext): Promise => [ + { version: ctx.version, property: "Fallback", file: ctx.file.name, entries: [] }, + ], + }; + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + routes: [route], + fallback: fallbackRoute, + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + expect(result.data).toHaveLength(2); + expect(result.summary.matchedFiles).toBe(1); + expect(result.summary.fallbackFiles).toBe(1); + }); + + it("should skip files with no match in non-strict mode", async () => { + const route = definePipelineRoute({ + id: "strict-test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Matched", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { + "matched.txt": "content", + "unmatched.txt": "other", + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + routes: [route], + strict: false, + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + expect(result.data).toHaveLength(1); + expect(result.summary.skippedFiles).toBe(1); + }); + + it("should report error for unmatched files in strict mode", async () => { + const route = definePipelineRoute({ + id: "strict-test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Matched", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { + "matched.txt": "content", + "unmatched.txt": "other", + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + routes: [route], + strict: true, + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.scope).toBe("file"); + expect(result.errors[0]!.message).toContain("No matching route"); + }); + + it("should handle route errors gracefully", async () => { + const route = definePipelineRoute({ + id: "error-route", + filter: byName("error.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (): Promise => { + throw new Error("Resolver failed"); + }, + }); + + const source = createMockSource({ + "16.0.0": { "error.txt": "content" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.scope).toBe("route"); + expect(result.errors[0]!.message).toBe("Resolver failed"); + expect(result.errors[0]!.routeId).toBe("error-route"); + }); + + it("should process multiple versions", async () => { + const route = definePipelineRoute({ + id: "multi-version", + filter: byName("data.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { "data.txt": "content" }, + "15.1.0": { "data.txt": "content" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + source, + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(2); + const versions = result.data.map((d) => (d as PropertyJson).version); + expect(versions).toContain("16.0.0"); + expect(versions).toContain("15.1.0"); + expect(result.summary.versions).toEqual(["16.0.0", "15.1.0"]); + }); + + it("should apply include filter to limit processed files", async () => { + const route = definePipelineRoute({ + id: "include-test", + filter: byExt(".txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { + "include.txt": "content", + "exclude.txt": "content", + "data/nested.txt": "content", + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source, + routes: [route], + include: byDir(""), + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(2); + const files = result.data.map((d) => (d as PropertyJson).file); + expect(files).toContain("include.txt"); + expect(files).toContain("exclude.txt"); + expect(files).not.toContain("nested.txt"); + }); +}); + +describe("filters", () => { + const createFile = (path: string, version = "16.0.0"): FileContext => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + }); + + describe("byName", () => { + it("should match exact file name", () => { + const filter = byName("test.txt"); + expect(filter({ file: createFile("test.txt") })).toBe(true); + expect(filter({ file: createFile("other.txt") })).toBe(false); + expect(filter({ file: createFile("dir/test.txt") })).toBe(true); + }); + }); + + describe("byDir", () => { + it("should match files in directory", () => { + const filter = byDir("ucd"); + expect(filter({ file: createFile("ucd/test.txt") })).toBe(true); + expect(filter({ file: createFile("other/test.txt") })).toBe(false); + expect(filter({ file: createFile("test.txt") })).toBe(false); + }); + }); + + describe("byExt", () => { + it("should match files by extension", () => { + const filter = byExt(".txt"); + expect(filter({ file: createFile("test.txt") })).toBe(true); + expect(filter({ file: createFile("test.html") })).toBe(false); + }); + }); + + describe("byGlob", () => { + it("should match files by glob pattern", () => { + const filter = byGlob("**/*.txt"); + expect(filter({ file: createFile("test.txt") })).toBe(true); + expect(filter({ file: createFile("dir/test.txt") })).toBe(true); + expect(filter({ file: createFile("test.html") })).toBe(false); + }); + + it("should support complex glob patterns", () => { + const filter = byGlob("auxiliary/*.txt"); + expect(filter({ file: createFile("auxiliary/WordBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("UnicodeData.txt") })).toBe(false); + }); + }); +}); + +describe("type inference", () => { + it("should infer route output types", () => { + const route = definePipelineRoute({ + id: "typed-route", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [{ codePoint: "0000", value: "X" }] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({}), + routes: [route] as const, + }); + + const _typeTest = async () => { + const result = await pipeline.run(); + expectTypeOf(result.data).toBeArray(); + }; + }); + + it("should have properly typed resolver context", () => { + const aliasArtifact = definePipelineArtifact({ + id: "aliases", + build: async () => new Map(), + }); + + const countArtifact = definePipelineArtifact({ + id: "count", + build: async () => 42, + }); + + const route = definePipelineRoute({ + id: "typed-artifacts", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx, _rows): Promise => { + const aliases = ctx.getArtifact("aliases"); + const count = ctx.getArtifact("count"); + + expect(aliases).toBeUndefined(); + expect(count).toBeUndefined(); + + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const _pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({}), + artifacts: [aliasArtifact, countArtifact], + routes: [route], + }); + }); +}); diff --git a/packages/pipelines/test/results.test.ts b/packages/pipelines/test/results.test.ts new file mode 100644 index 000000000..b67d161a6 --- /dev/null +++ b/packages/pipelines/test/results.test.ts @@ -0,0 +1,456 @@ +import type { FileContext, ParseContext, ParsedRow, PropertyJson } from "../src/types"; +import type { PipelineGraph } from "../src/events"; +import type { PipelineRunResult, PipelineSummary } from "../src/results"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { definePipelineRoute } from "../src/route"; + +function createMockSource(files: Record>) { + return { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }; +} + +function createRow(ctx: ParseContext, props: Partial): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("PipelineSummary", () => { + it("should have correct structure", () => { + const summary: PipelineSummary = { + versions: ["16.0.0", "15.1.0"], + totalFiles: 100, + matchedFiles: 80, + skippedFiles: 15, + fallbackFiles: 5, + totalOutputs: 120, + durationMs: 500, + }; + + expectTypeOf(summary.versions).toEqualTypeOf(); + expectTypeOf(summary.totalFiles).toEqualTypeOf(); + expectTypeOf(summary.matchedFiles).toEqualTypeOf(); + expectTypeOf(summary.skippedFiles).toEqualTypeOf(); + expectTypeOf(summary.fallbackFiles).toEqualTypeOf(); + expectTypeOf(summary.totalOutputs).toEqualTypeOf(); + expectTypeOf(summary.durationMs).toEqualTypeOf(); + }); + + it("should contain all processed versions", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0", "15.0.0"], + source: createMockSource({ + "16.0.0": { "test.txt": "content" }, + "15.1.0": { "test.txt": "content" }, + "15.0.0": { "test.txt": "content" }, + }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.versions).toEqual(["16.0.0", "15.1.0", "15.0.0"]); + }); + + it("should track total files across all versions", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + source: createMockSource({ + "16.0.0": { "test.txt": "a", "other.txt": "b" }, + "15.1.0": { "test.txt": "c", "another.txt": "d", "third.txt": "e" }, + }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.totalFiles).toBe(5); + }); + + it("should track matched files", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ + "16.0.0": { + "matched.txt": "a", + "unmatched1.txt": "b", + "unmatched2.txt": "c", + }, + }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.matchedFiles).toBe(1); + }); + + it("should track skipped files", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ + "16.0.0": { + "matched.txt": "a", + "unmatched1.txt": "b", + "unmatched2.txt": "c", + }, + }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.skippedFiles).toBe(2); + }); + + it("should track total outputs", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", value: "A" }); + yield createRow(ctx, { codePoint: "0042", value: "B" }); + }, + resolver: async (ctx, rows): Promise => { + const outputs: PropertyJson[] = []; + for await (const row of rows) { + outputs.push({ + version: ctx.version, + property: row.value as string, + file: ctx.file.name, + entries: [], + }); + } + return outputs; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.totalOutputs).toBe(2); + }); + + it("should track duration", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.durationMs).toBeGreaterThanOrEqual(0); + }); +}); + +describe("PipelineRunResult", () => { + it("should have correct structure", () => { + type TestData = { id: string }; + const result: PipelineRunResult = { + data: [{ id: "1" }, { id: "2" }], + graph: { nodes: [], edges: [] }, + errors: [], + summary: { + versions: ["16.0.0"], + totalFiles: 10, + matchedFiles: 8, + skippedFiles: 2, + fallbackFiles: 0, + totalOutputs: 8, + durationMs: 100, + }, + }; + + expectTypeOf(result.data).toEqualTypeOf(); + expectTypeOf(result.graph).toEqualTypeOf(); + expectTypeOf(result.errors).toBeArray(); + expectTypeOf(result.summary).toEqualTypeOf(); + }); + + it("should contain all outputs in data array", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", value: "A" }); + yield createRow(ctx, { codePoint: "0042", value: "B" }); + }, + resolver: async (ctx, rows): Promise => { + const outputs: PropertyJson[] = []; + for await (const row of rows) { + outputs.push({ + version: ctx.version, + property: row.value as string, + file: ctx.file.name, + entries: [{ codePoint: row.codePoint, value: row.value as string }], + }); + } + return outputs; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(2); + expect(result.data[0]?.property).toBe("A"); + expect(result.data[1]?.property).toBe("B"); + }); + + it("should contain graph structure", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.graph).toBeDefined(); + expect(result.graph.nodes).toBeInstanceOf(Array); + expect(result.graph.edges).toBeInstanceOf(Array); + expect(result.graph.nodes.length).toBeGreaterThan(0); + expect(result.graph.edges.length).toBeGreaterThan(0); + }); + + it("should contain errors when they occur", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (): Promise => { + throw new Error("Test error"); + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]?.message).toBe("Test error"); + expect(result.errors[0]?.scope).toBe("route"); + }); + + it("should have empty errors array when no errors occur", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + }); + + it("should accumulate multiple errors", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (): Promise => { + throw new Error("Route error"); + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + source: createMockSource({ + "16.0.0": { "test.txt": "content" }, + "15.1.0": { "test.txt": "content" }, + }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors.length).toBe(2); + }); +}); + +describe("Result data typing", () => { + it("should infer output type from routes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expectTypeOf(result.data).toEqualTypeOf(); + }); + + it("should combine multiple route output types when using same base type", async () => { + const route1 = definePipelineRoute({ + id: "route1", + filter: byName("a.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "A", file: ctx.file.name, entries: [] }]; + }, + }); + + const route2 = definePipelineRoute({ + id: "route2", + filter: byName("b.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "B", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ + "16.0.0": { "a.txt": "content", "b.txt": "content" }, + }), + routes: [route1, route2], + }); + + const result = await pipeline.run(); + + expectTypeOf(result.data).toEqualTypeOf(); + expect(result.data).toHaveLength(2); + }); +}); diff --git a/packages/pipelines/test/route.test.ts b/packages/pipelines/test/route.test.ts new file mode 100644 index 000000000..544be882e --- /dev/null +++ b/packages/pipelines/test/route.test.ts @@ -0,0 +1,439 @@ +import type { FileContext, ParseContext, ParsedRow, PropertyJson } from "../src/types"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { + definePipelineRoute, + type InferRouteId, + type InferRouteOutput, + type InferRoutesOutput, + type PipelineRouteDefinition, +} from "../src/route"; + +function createMockSource(files: Record>) { + return { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }; +} + +function createRow(ctx: ParseContext, props: Partial): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("definePipelineRoute", () => { + it("should create a route definition with required fields", () => { + const route = definePipelineRoute({ + id: "test-route", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + expect(route.id).toBe("test-route"); + expect(typeof route.filter).toBe("function"); + expect(typeof route.parser).toBe("function"); + expect(typeof route.resolver).toBe("function"); + }); + + it("should preserve the route id as a literal type", () => { + const route = definePipelineRoute({ + id: "line-break-route", + filter: byName("LineBreak.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Line_Break", file: ctx.file.name, entries: [] }, + ], + }); + + expectTypeOf(route.id).toEqualTypeOf<"line-break-route">(); + }); + + it("should support optional out configuration", () => { + const route = definePipelineRoute({ + id: "with-output-config", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + out: { + dir: "custom-output", + fileName: (pj) => `${pj.property.toLowerCase()}.json`, + }, + }); + + expect(route.out).toBeDefined(); + expect(route.out?.dir).toBe("custom-output"); + expect(typeof route.out?.fileName).toBe("function"); + }); + + it("should parse file content correctly", async () => { + const parsedRows: ParsedRow[] = []; + + const route = definePipelineRoute({ + id: "parsing-test", + filter: byName("data.txt"), + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (!ctx.isComment(line) && line.trim()) { + const [codePoint, value] = line.split(";").map((s) => s.trim()); + yield createRow(ctx, { codePoint, value }); + } + } + }, + resolver: async (ctx, rows): Promise => { + for await (const row of rows) { + parsedRows.push(row); + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ + "16.0.0": { "data.txt": "# comment\n0041;A\n0042;B\n\n0043;C" }, + }), + routes: [route], + }); + + await pipeline.run(); + + expect(parsedRows).toHaveLength(3); + expect(parsedRows[0]!.codePoint).toBe("0041"); + expect(parsedRows[1]!.codePoint).toBe("0042"); + expect(parsedRows[2]!.codePoint).toBe("0043"); + }); + + it("should receive file context in parser", async () => { + let receivedFile: FileContext | undefined; + + const route = definePipelineRoute({ + id: "file-context-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + receivedFile = ctx.file; + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + // Must consume rows to trigger parser execution + for await (const _row of rows) { + // consume + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + await pipeline.run(); + + expect(receivedFile).toBeDefined(); + expect(receivedFile?.name).toBe("test.txt"); + expect(receivedFile?.version).toBe("16.0.0"); + }); + + it("should receive version and file in resolver context", async () => { + let receivedVersion: string | undefined; + let receivedFileName: string | undefined; + + const route = definePipelineRoute({ + id: "resolver-context-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => { + receivedVersion = ctx.version; + receivedFileName = ctx.file.name; + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + await pipeline.run(); + + expect(receivedVersion).toBe("16.0.0"); + expect(receivedFileName).toBe("test.txt"); + }); + + it("should provide normalizeEntries utility in resolver", async () => { + let hasNormalizeEntries = false; + + const route = definePipelineRoute({ + id: "normalize-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => { + hasNormalizeEntries = typeof ctx.normalizeEntries === "function"; + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + await pipeline.run(); + + expect(hasNormalizeEntries).toBe(true); + }); + + it("should provide now utility for timestamps", async () => { + let timestamp: string | undefined; + + const route = definePipelineRoute({ + id: "timestamp-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => { + timestamp = ctx.now(); + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + await pipeline.run(); + + expect(timestamp).toBeDefined(); + expect(new Date(timestamp!).toISOString()).toBe(timestamp); + }); + + it("should match files based on filter", async () => { + const matchedFiles: string[] = []; + + const route = definePipelineRoute({ + id: "filter-test", + filter: byName("target.txt"), + parser: async function* (ctx) { + matchedFiles.push(ctx.file.name); + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) { + // consume to trigger parser + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ + "16.0.0": { + "target.txt": "content", + "other.txt": "content", + "another.txt": "content", + }, + }), + routes: [route], + }); + + await pipeline.run(); + + expect(matchedFiles).toEqual(["target.txt"]); + }); + + it("should support returning single output instead of array", async () => { + const route = definePipelineRoute({ + id: "single-output", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(1); + }); + + it("should support returning multiple outputs from resolver", async () => { + const route = definePipelineRoute({ + id: "multi-output", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", property: "A", value: "A" }); + yield createRow(ctx, { codePoint: "0042", property: "B", value: "B" }); + }, + resolver: async (ctx, rows): Promise => { + const outputs: PropertyJson[] = []; + for await (const row of rows) { + outputs.push({ + version: ctx.version, + property: row.property ?? "Unknown", + file: ctx.file.name, + entries: [{ codePoint: row.codePoint, value: row.value as string }], + }); + } + return outputs; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(2); + }); + + it("should handle parser errors", async () => { + const route = definePipelineRoute({ + id: "parser-error", + filter: byName("test.txt"), + parser: async function* () { + throw new Error("Parser failed"); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) { + // consume to trigger parser error + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.message).toBe("Parser failed"); + }); + + it("should handle resolver errors", async () => { + const route = definePipelineRoute({ + id: "resolver-error", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (): Promise => { + throw new Error("Resolver failed"); + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + source: createMockSource({ "16.0.0": { "test.txt": "content" } }), + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.message).toBe("Resolver failed"); + expect(result.errors[0]!.routeId).toBe("resolver-error"); + }); +}); + +describe("route type inference", () => { + it("should infer route id type", () => { + const route = definePipelineRoute({ + id: "inferred-id", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + type Id = InferRouteId; + expectTypeOf().toEqualTypeOf<"inferred-id">(); + }); + + it("should infer route output type", () => { + const route = definePipelineRoute({ + id: "typed-output", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + type Output = InferRouteOutput; + expectTypeOf().toEqualTypeOf(); + }); + + it("should infer combined output from multiple routes", () => { + type Routes = readonly [ + PipelineRouteDefinition<"a", Record, PropertyJson[]>, + PipelineRouteDefinition<"b", Record, PropertyJson[]>, + ]; + + type Output = InferRoutesOutput; + expectTypeOf().toEqualTypeOf(); + }); + + it("should type PipelineRouteDefinition with generics", () => { + type CustomRoute = PipelineRouteDefinition<"custom-id", { cache: Map }, PropertyJson[]>; + + expectTypeOf().toEqualTypeOf<"custom-id">(); + }); +}); diff --git a/packages/pipelines/test/types.test.ts b/packages/pipelines/test/types.test.ts new file mode 100644 index 000000000..ba7b5758e --- /dev/null +++ b/packages/pipelines/test/types.test.ts @@ -0,0 +1,458 @@ +import { describe, expectTypeOf, it } from "vitest"; +import type { + InferArtifactId, + InferArtifactsMap, + InferArtifactValue, + PipelineArtifactDefinition, +} from "../src/artifact"; +import type { + PipelineEvent, + PipelineGraph, + PipelineGraphEdge, + PipelineGraphNode, + PipelineError, +} from "../src/events"; +import type { PipelineRunResult, PipelineSummary } from "../src/results"; +import type { + InferRouteId, + InferRouteOutput, + InferRoutesOutput, + PipelineRouteDefinition, +} from "../src/route"; +import type { + DefaultRange, + FileContext, + FilterContext, + ParseContext, + ParsedRow, + ParserFn, + PipelineFilter, + PipelineSource, + PropertyJson, + ResolvedEntry, + ResolveContext, + ResolverFn, + RowContext, +} from "../src/types"; + +describe("FileContext type", () => { + it("should have correct structure", () => { + expectTypeOf().toMatchTypeOf<{ + version: string; + dir: string; + path: string; + name: string; + ext: string; + }>(); + }); + + it("should allow specific dir values", () => { + const file: FileContext = { + version: "16.0.0", + dir: "ucd", + path: "ucd/LineBreak.txt", + name: "LineBreak.txt", + ext: ".txt", + }; + + expectTypeOf(file.dir).toMatchTypeOf(); + }); +}); + +describe("RowContext type", () => { + it("should have optional property field", () => { + expectTypeOf().toMatchTypeOf<{ property?: string }>(); + }); +}); + +describe("FilterContext type", () => { + it("should have file and optional row", () => { + expectTypeOf().toMatchTypeOf<{ + file: FileContext; + row?: RowContext; + }>(); + }); +}); + +describe("PipelineFilter type", () => { + it("should be a predicate function", () => { + expectTypeOf().toEqualTypeOf<(ctx: FilterContext) => boolean>(); + }); +}); + +describe("ParsedRow type", () => { + it("should have required sourceFile and kind", () => { + expectTypeOf().toMatchTypeOf<{ + sourceFile: string; + kind: "range" | "point" | "sequence" | "alias"; + }>(); + }); + + it("should have optional range fields", () => { + expectTypeOf().toMatchTypeOf<{ + start?: string; + end?: string; + codePoint?: string; + sequence?: string[]; + }>(); + }); + + it("should have optional property and value", () => { + expectTypeOf().toMatchTypeOf<{ + property?: string; + value?: string | string[]; + meta?: Record; + }>(); + }); +}); + +describe("ParseContext type", () => { + it("should have file and reader methods", () => { + expectTypeOf().toMatchTypeOf<{ + file: FileContext; + readContent: () => Promise; + readLines: () => AsyncIterable; + isComment: (line: string) => boolean; + }>(); + }); +}); + +describe("ParserFn type", () => { + it("should take ParseContext and return AsyncIterable of ParsedRow", () => { + expectTypeOf().toEqualTypeOf<(ctx: ParseContext) => AsyncIterable>(); + }); +}); + +describe("ResolvedEntry type", () => { + it("should have value and optional location fields", () => { + expectTypeOf().toMatchTypeOf<{ + value: string | string[]; + range?: `${string}..${string}`; + codePoint?: string; + sequence?: string[]; + }>(); + }); +}); + +describe("DefaultRange type", () => { + it("should have range and value", () => { + expectTypeOf().toMatchTypeOf<{ + range: `${string}..${string}`; + value: string | string[]; + }>(); + }); +}); + +describe("PropertyJson type", () => { + it("should have version, property, file, and entries", () => { + expectTypeOf().toMatchTypeOf<{ + version: string; + property: string; + file: string; + entries: ResolvedEntry[]; + }>(); + }); + + it("should have optional defaults and meta", () => { + expectTypeOf().toMatchTypeOf<{ + defaults?: DefaultRange[]; + meta?: Record; + }>(); + }); +}); + +describe("ResolveContext type", () => { + it("should have version and file", () => { + expectTypeOf().toMatchTypeOf<{ + version: string; + file: FileContext; + }>(); + }); + + it("should have getArtifact method", () => { + type Ctx = ResolveContext<{ aliases: Map; count: number }>; + + expectTypeOf().toBeFunction(); + }); + + it("should have utility methods", () => { + expectTypeOf().toMatchTypeOf<{ + normalizeEntries: (entries: ResolvedEntry[]) => ResolvedEntry[]; + now: () => string; + }>(); + }); +}); + +describe("ResolverFn type", () => { + it("should take context and rows and return promise", () => { + type Fn = ResolverFn, PropertyJson[]>; + + expectTypeOf().toMatchTypeOf< + (ctx: ResolveContext>, rows: AsyncIterable) => Promise + >(); + }); + + it("should support custom artifact types", () => { + type CustomArtifacts = { cache: Map }; + type Fn = ResolverFn; + + expectTypeOf().toMatchTypeOf< + (ctx: ResolveContext, rows: AsyncIterable) => Promise + >(); + }); +}); + +describe("PipelineSource type", () => { + it("should have listFiles and readFile methods", () => { + expectTypeOf().toMatchTypeOf<{ + listFiles: (version: string) => Promise; + readFile: (file: FileContext) => Promise; + }>(); + }); +}); + +describe("PipelineArtifactDefinition type", () => { + it("should have id and build function", () => { + expectTypeOf().toMatchTypeOf<{ + id: string; + build: (ctx: { version: string }, rows?: AsyncIterable) => Promise; + }>(); + }); + + it("should have optional filter and parser", () => { + expectTypeOf().toMatchTypeOf<{ + filter?: PipelineFilter; + parser?: ParserFn; + }>(); + }); + + it("should preserve generic id type", () => { + type Specific = PipelineArtifactDefinition<"my-id", number>; + expectTypeOf().toEqualTypeOf<"my-id">(); + }); + + it("should preserve generic value type", () => { + type Specific = PipelineArtifactDefinition<"id", Map>; + expectTypeOf().returns.resolves.toEqualTypeOf>(); + }); +}); + +describe("InferArtifactId type", () => { + it("should extract id from artifact definition", () => { + type Def = PipelineArtifactDefinition<"extracted-id", unknown>; + expectTypeOf>().toEqualTypeOf<"extracted-id">(); + }); +}); + +describe("InferArtifactValue type", () => { + it("should extract value type from artifact definition", () => { + type Def = PipelineArtifactDefinition<"id", Set>; + expectTypeOf>().toEqualTypeOf>(); + }); +}); + +describe("InferArtifactsMap type", () => { + it("should create a map from artifact array", () => { + type Artifacts = [ + PipelineArtifactDefinition<"a", string>, + PipelineArtifactDefinition<"b", number>, + PipelineArtifactDefinition<"c", boolean>, + ]; + + expectTypeOf>().toEqualTypeOf<{ + a: string; + b: number; + c: boolean; + }>(); + }); + + it("should handle complex value types", () => { + type Artifacts = [ + PipelineArtifactDefinition<"map", Map>, + PipelineArtifactDefinition<"set", Set>, + PipelineArtifactDefinition<"obj", { nested: { value: boolean } }>, + ]; + + expectTypeOf>().toEqualTypeOf<{ + map: Map; + set: Set; + obj: { nested: { value: boolean } }; + }>(); + }); +}); + +describe("PipelineRouteDefinition type", () => { + it("should have id, filter, parser, and resolver", () => { + expectTypeOf().toMatchTypeOf<{ + id: string; + filter: PipelineFilter; + parser: ParserFn; + resolver: (ctx: ResolveContext, rows: AsyncIterable) => Promise; + }>(); + }); + + it("should preserve generic types", () => { + type Route = PipelineRouteDefinition<"my-route", { cache: number }, string[]>; + + expectTypeOf().toEqualTypeOf<"my-route">(); + }); +}); + +describe("InferRouteId type", () => { + it("should extract id from route definition", () => { + type Route = PipelineRouteDefinition<"line-break", Record, PropertyJson[]>; + expectTypeOf>().toEqualTypeOf<"line-break">(); + }); +}); + +describe("InferRouteOutput type", () => { + it("should extract output type from route definition", () => { + type Route = PipelineRouteDefinition<"id", Record, { custom: true }[]>; + expectTypeOf>().toEqualTypeOf<{ custom: true }[]>(); + }); +}); + +describe("InferRoutesOutput type", () => { + it("should union output types from route array", () => { + type Routes = readonly [ + PipelineRouteDefinition<"a", Record, PropertyJson[]>, + PipelineRouteDefinition<"b", Record, PropertyJson[]>, + ]; + + expectTypeOf>().toEqualTypeOf(); + }); +}); + +describe("PipelineEvent type", () => { + it("should be a union of event types", () => { + expectTypeOf().toMatchTypeOf<{ type: string; timestamp: number }>(); + }); + + it("should include pipeline lifecycle events", () => { + const startEvent: PipelineEvent = { + type: "pipeline:start", + versions: ["16.0.0"], + timestamp: Date.now(), + }; + + const endEvent: PipelineEvent = { + type: "pipeline:end", + durationMs: 100, + timestamp: Date.now(), + }; + + expectTypeOf(startEvent).toMatchTypeOf(); + expectTypeOf(endEvent).toMatchTypeOf(); + }); + + it("should include version events", () => { + const startEvent: PipelineEvent = { + type: "version:start", + version: "16.0.0", + timestamp: Date.now(), + }; + + expectTypeOf(startEvent).toMatchTypeOf(); + }); + + it("should include file events", () => { + const matchedEvent: PipelineEvent = { + type: "file:matched", + file: { version: "16.0.0", dir: "", path: "test.txt", name: "test.txt", ext: ".txt" }, + routeId: "route-id", + timestamp: Date.now(), + }; + + expectTypeOf(matchedEvent).toMatchTypeOf(); + }); + + it("should include error events", () => { + const errorEvent: PipelineEvent = { + type: "error", + error: { scope: "route", message: "Failed", routeId: "id", version: "16.0.0" }, + timestamp: Date.now(), + }; + + expectTypeOf(errorEvent).toMatchTypeOf(); + }); +}); + +describe("PipelineGraphNode type", () => { + it("should have id and type", () => { + expectTypeOf().toMatchTypeOf<{ + id: string; + type: "source" | "artifact" | "file" | "route" | "output"; + }>(); + }); +}); + +describe("PipelineGraphEdge type", () => { + it("should have from, to, and type", () => { + expectTypeOf().toMatchTypeOf<{ + from: string; + to: string; + type: "provides" | "matched" | "parsed" | "resolved" | "uses-artifact"; + }>(); + }); +}); + +describe("PipelineGraph type", () => { + it("should have nodes and edges", () => { + expectTypeOf().toMatchTypeOf<{ + nodes: PipelineGraphNode[]; + edges: PipelineGraphEdge[]; + }>(); + }); +}); + +describe("PipelineError type", () => { + it("should have scope and message", () => { + expectTypeOf().toMatchTypeOf<{ + scope: "artifact" | "route" | "file" | "pipeline" | "version"; + message: string; + }>(); + }); + + it("should have optional version", () => { + expectTypeOf().toMatchTypeOf<{ version?: string }>(); + }); + + it("should have optional context fields", () => { + expectTypeOf().toMatchTypeOf<{ + error?: unknown; + file?: FileContext; + routeId?: string; + artifactId?: string; + }>(); + }); +}); + +describe("PipelineSummary type", () => { + it("should have file counts", () => { + expectTypeOf().toMatchTypeOf<{ + versions: string[]; + totalFiles: number; + matchedFiles: number; + skippedFiles: number; + fallbackFiles: number; + totalOutputs: number; + durationMs: number; + }>(); + }); +}); + +describe("PipelineRunResult type", () => { + it("should have data, graph, errors, and summary", () => { + expectTypeOf>().toMatchTypeOf<{ + data: PropertyJson[]; + graph: PipelineGraph; + errors: PipelineError[]; + summary: PipelineSummary; + }>(); + }); + + it("should preserve generic data type", () => { + type CustomOutput = { custom: true }; + expectTypeOf["data"]>().toEqualTypeOf(); + }); +}); diff --git a/packages/pipelines/tsconfig.build.json b/packages/pipelines/tsconfig.build.json new file mode 100644 index 000000000..36c889e0c --- /dev/null +++ b/packages/pipelines/tsconfig.build.json @@ -0,0 +1,5 @@ +{ + "extends": "./tsconfig.json", + "include": ["src"], + "exclude": ["dist", "test"] +} diff --git a/packages/pipelines/tsconfig.json b/packages/pipelines/tsconfig.json new file mode 100644 index 000000000..9c6dd744b --- /dev/null +++ b/packages/pipelines/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "@ucdjs-tooling/tsconfig/base", + "include": [ + "src", + "test" + ], + "exclude": ["dist"] +} diff --git a/packages/pipelines/tsdown.config.ts b/packages/pipelines/tsdown.config.ts new file mode 100644 index 000000000..dee0149e6 --- /dev/null +++ b/packages/pipelines/tsdown.config.ts @@ -0,0 +1,7 @@ +import { createTsdownConfig } from "@ucdjs-tooling/tsdown-config"; + +export default createTsdownConfig({ + entry: [ + "./src/index.ts", + ], +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e1476863a..01694595d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -913,6 +913,43 @@ importers: specifier: catalog:testing version: 4.3.0(vitest@4.0.16) + packages/pipelines: + dependencies: + '@ucdjs-internal/shared': + specifier: workspace:* + version: link:../shared + picomatch: + specifier: catalog:prod + version: 4.0.3 + devDependencies: + '@luxass/eslint-config': + specifier: catalog:linting + version: 6.0.3(@eslint-react/eslint-plugin@2.3.12(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(@vue/compiler-sfc@3.5.25)(eslint-plugin-format@1.2.0(eslint@9.39.2(jiti@2.6.1)))(eslint-plugin-react-hooks@7.0.1(eslint@9.39.2(jiti@2.6.1)))(eslint-plugin-react-refresh@0.4.24(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.16) + '@types/picomatch': + specifier: catalog:types + version: 4.0.2 + '@ucdjs-tooling/tsconfig': + specifier: workspace:* + version: link:../../tooling/tsconfig + '@ucdjs-tooling/tsdown-config': + specifier: workspace:* + version: link:../../tooling/tsdown-config + eslint: + specifier: catalog:linting + version: 9.39.2(jiti@2.6.1) + publint: + specifier: catalog:dev + version: 0.3.16 + tsdown: + specifier: catalog:dev + version: 0.18.4(publint@0.3.16)(synckit@0.11.11)(typescript@5.9.3) + tsx: + specifier: catalog:dev + version: 4.21.0 + typescript: + specifier: catalog:dev + version: 5.9.3 + packages/schema-gen: dependencies: '@ai-sdk/openai':