Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Repository Guidelines

## Project Structure & Module Organization
- `rust/lance-graph/` hosts the Rust Cypher engine; keep new modules under `src/` and co-locate helpers inside `query/` or feature-specific submodules.
- `python/src/` contains the PyO3 bridge; `python/python/lance_graph/` holds the pure-Python facade and packaging metadata.
- `python/python/tests/` stores functional tests; mirror new features with targeted cases here and in the corresponding Rust module.
- `examples/` demonstrates Cypher usage; update or add examples when introducing new public APIs.

## Build, Test, and Development Commands
- `cargo check` / `cargo test --all` (run inside `rust/lance-graph`) validate Rust code paths.
- `cargo bench --bench graph_execution` measures performance-critical changes; include shortened runs with `--warm-up-time 1`.
- `uv venv --python 3.11 .venv` and `uv pip install -e '.[tests]'` bootstrap the Python workspace.
- `maturin develop` rebuilds the extension after Rust edits; `pytest python/python/tests/ -v` exercises Python bindings.
- `make lint` (in `python/`) runs `ruff`, formatting checks, and `pyright`.

## Coding Style & Naming Conventions
- Format Rust with `cargo fmt --all`; keep modules and functions snake_case, types PascalCase, and reuse `snafu` error patterns.
- Run `cargo clippy --all-targets --all-features` to catch lint regressions.
- Use 4-space indentation in Python; maintain snake_case modules, CamelCase classes, and type-annotated public APIs.
- Apply `ruff format python/` before committing; `ruff check` and `pyright` enforce import hygiene and typing.

## Testing Guidelines
- Add Rust unit tests alongside implementations via `#[cfg(test)]`; prefer focused scenarios over broad integration.
- Python tests belong in `python/python/tests/`; name files `test_*.py` and use markers (`gpu`, `cuda`, `integration`, `slow`) consistently.
- When touching performance-sensitive code, capture representative `cargo bench` or large-table pytest timing notes in the PR.

## Commit & Pull Request Guidelines
- Follow the existing history style (`feat(graph):`, `docs:`, `refactor(query):`), using imperative, ≤72-character subjects.
- Reference issues or discussions when relevant and include brief context in the body.
- PRs should describe scope, list test commands run, mention benchmark deltas when applicable, and highlight impacts on bindings or examples.
4 changes: 4 additions & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ classifiers = [

[tool.maturin]
python-source = "python"
python-packages = ["lance_graph", "knowledge_graph"]

[build-system]
requires = ["maturin>=1.4"]
Expand All @@ -37,6 +38,9 @@ build-backend = "maturin"
tests = ["pytest", "pyarrow>=14", "pandas"]
dev = ["ruff", "pyright"]

[project.scripts]
knowledge_graph = "knowledge_graph.main:main"

[tool.ruff]
lint.select = ["F", "E", "W", "I", "G", "TCH", "PERF", "B019"]

Expand Down
103 changes: 103 additions & 0 deletions python/python/knowledge_graph/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""High-level helpers for working with Lance-backed knowledge graphs."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, Mapping, Optional

import pyarrow as pa
from lance_graph import CypherQuery, GraphConfig

try: # Prefer to import for typing without raising at runtime.
from lance_graph import GraphConfigBuilder
except ImportError: # pragma: no cover - builder is available in normal installs.
GraphConfigBuilder = object # type: ignore[assignment]

TableMapping = Mapping[str, pa.Table]


def _ensure_table(name: str, table: pa.Table) -> pa.Table:
if not isinstance(table, pa.Table):
raise TypeError(
f"Dataset '{name}' must be a pyarrow.Table (got {type(table)!r})"
)
return table


@dataclass(frozen=True)
class KnowledgeGraph:
"""Wraps a ``GraphConfig`` alongside the Arrow tables backing it."""

config: GraphConfig
_tables: Dict[str, pa.Table]

def __init__(self, config: GraphConfig, datasets: TableMapping) -> None:
object.__setattr__(self, "config", config)
normalized = {
name: _ensure_table(name, table) for name, table in datasets.items()
}
object.__setattr__(self, "_tables", normalized)

def run(
self,
statement: str,
*,
datasets: Optional[TableMapping] = None,
):
"""Execute a Cypher statement, overriding tables when provided."""
query = CypherQuery(statement).with_config(self.config)
sources: Dict[str, pa.Table] = dict(self._tables)
if datasets:
sources.update(
{name: _ensure_table(name, table) for name, table in datasets.items()}
)
return query.execute(sources)

def tables(self) -> Dict[str, pa.Table]:
"""Return a shallow copy of the registered datasets."""
return dict(self._tables)


class KnowledgeGraphBuilder:
"""Collects nodes, relationships, and datasets before building a graph."""

def __init__(self) -> None:
builder = GraphConfig.builder()
self._builder: GraphConfigBuilder = builder # type: ignore[annotation-unchecked]
self._datasets: Dict[str, pa.Table] = {}

def with_node(
self,
label: str,
primary_key: str,
table: pa.Table,
) -> KnowledgeGraphBuilder:
"""Register a node label and Arrow table."""
self._builder = self._builder.with_node_label(label, primary_key)
self._datasets[label] = _ensure_table(label, table)
return self

def with_relationship(
self,
name: str,
source_key: str,
target_key: str,
table: pa.Table,
) -> KnowledgeGraphBuilder:
"""Register a relationship and its underlying table."""
self._builder = self._builder.with_relationship(name, source_key, target_key)
self._datasets[name] = _ensure_table(name, table)
return self

def with_dataset(self, name: str, table: pa.Table) -> KnowledgeGraphBuilder:
"""Attach arbitrary supporting datasets (e.g., reference tables)."""
self._datasets[name] = _ensure_table(name, table)
return self

def build(self) -> KnowledgeGraph:
"""Materialize the ``KnowledgeGraph`` instance."""
config = self._builder.build()
return KnowledgeGraph(config, self._datasets)


__all__ = ["KnowledgeGraph", "KnowledgeGraphBuilder"]
8 changes: 8 additions & 0 deletions python/python/knowledge_graph/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Executable module wrapper for `python -m knowledge_graph`."""

from __future__ import annotations

from .main import main

if __name__ == "__main__":
raise SystemExit(main())
110 changes: 110 additions & 0 deletions python/python/knowledge_graph/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""Command line interface for the knowledge_graph helpers."""

from __future__ import annotations

import argparse
from pathlib import Path
from typing import Optional, Sequence


def init_graph() -> None:
"""Initialize storage for the knowledge graph."""
pass


def run_interactive() -> None:
"""Enter an interactive shell for issuing commands."""
pass


def execute_query(text: str) -> None:
"""Execute a single knowledge graph query."""
del text # placeholder until implementation


def preview_extraction(path: Path) -> None:
"""Preview extracted knowledge from a text source."""
del path # placeholder until implementation


def extract_and_add(path: Path) -> None:
"""Extract knowledge and append it to the backing graph."""
del path # placeholder until implementation


def ask_question(question: str) -> None:
"""Answer a natural-language question using the graph."""
del question # placeholder until implementation


def _build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="knowledge_graph",
description="Operate the Lance-backed knowledge graph.",
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--init",
action="store_true",
help="Initialize the knowledge graph storage.",
)
group.add_argument(
"--extract-preview",
metavar="PATH",
help="Preview extracted entities and relations from a text file.",
)
group.add_argument(
"--extract-and-add",
metavar="PATH",
help="Extract and insert knowledge from a text file.",
)
group.add_argument(
"--ask",
metavar="QUESTION",
help="Ask a natural-language question over the knowledge graph.",
)
parser.add_argument(
"query",
nargs="?",
help="Execute a single Cypher or semantic query.",
)
return parser


def main(argv: Optional[Sequence[str]] = None) -> int:
parser = _build_parser()
args = parser.parse_args(argv)

exclusive_args = any(
[
args.init,
args.extract_preview is not None,
args.extract_and_add is not None,
args.ask is not None,
]
)
if args.query and exclusive_args:
parser.error("Query argument cannot be combined with flags.")

if args.init:
init_graph()
return 0
if args.extract_preview:
preview_extraction(Path(args.extract_preview))
return 0
if args.extract_and_add:
extract_and_add(Path(args.extract_and_add))
return 0
if args.ask:
ask_question(args.ask)
return 0
if args.query:
execute_query(args.query)
return 0

run_interactive()
return 0


if __name__ == "__main__":
raise SystemExit(main())
Loading