diff --git a/rust/lance-graph/src/lib.rs b/rust/lance-graph/src/lib.rs index fcc5251..4d9bc7a 100644 --- a/rust/lance-graph/src/lib.rs +++ b/rust/lance-graph/src/lib.rs @@ -45,6 +45,7 @@ pub mod parser; pub mod query; pub mod query_processor; pub mod semantic; +pub mod simple_executor; pub mod source_catalog; pub mod sql_converter; diff --git a/rust/lance-graph/src/query.rs b/rust/lance-graph/src/query.rs index 41dba16..49f4d35 100644 --- a/rust/lance-graph/src/query.rs +++ b/rust/lance-graph/src/query.rs @@ -8,15 +8,11 @@ use crate::config::GraphConfig; use crate::error::{GraphError, Result}; use crate::logical_plan::LogicalPlanner; use crate::parser::parse_cypher_query; +use crate::simple_executor::{ + to_df_boolean_expr_simple, to_df_order_by_expr_simple, to_df_value_expr_simple, PathExecutor, +}; use std::collections::HashMap; -mod path_executor; -use self::path_executor::PathExecutor; -mod aliases; -mod clauses; -mod expr; -mod simple_executor; - /// Execution strategy for Cypher queries #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum ExecutionStrategy { @@ -723,9 +719,7 @@ impl CypherQuery { // Apply WHERE if present (limited support: simple comparisons on a single column) if let Some(where_clause) = &self.ast.where_clause { - if let Some(filter_expr) = - simple_executor::to_df_boolean_expr_simple(&where_clause.expression) - { + if let Some(filter_expr) = to_df_boolean_expr_simple(&where_clause.expression) { df = df.filter(filter_expr).map_err(|e| GraphError::PlanError { message: format!("Failed to apply filter: {}", e), location: snafu::Location::new(file!(), line!(), column!()), @@ -739,7 +733,7 @@ impl CypherQuery { .return_clause .items .iter() - .map(|item| simple_executor::to_df_value_expr_simple(&item.expression)) + .map(|item| to_df_value_expr_simple(&item.expression)) .collect(); if !proj_exprs.is_empty() { df = df.select(proj_exprs).map_err(|e| GraphError::PlanError { @@ -758,7 +752,7 @@ impl CypherQuery { // Apply ORDER BY if present if let Some(order_by) = &self.ast.order_by { - let sort_expr = simple_executor::to_df_order_by_expr_simple(&order_by.items); + let sort_expr = to_df_order_by_expr_simple(&order_by.items); df = df.sort(sort_expr).map_err(|e| GraphError::PlanError { message: format!("Failed to apply ORDER BY: {}", e), location: snafu::Location::new(file!(), line!(), column!()), diff --git a/rust/lance-graph/src/query/expr.rs b/rust/lance-graph/src/query/expr.rs deleted file mode 100644 index d7b9f3f..0000000 --- a/rust/lance-graph/src/query/expr.rs +++ /dev/null @@ -1,74 +0,0 @@ -pub(super) fn to_df_boolean_expr_with_vars( - expr: &crate::ast::BooleanExpression, - qualify: &F, -) -> Option -where - F: Fn(&str, &str) -> String, -{ - use crate::ast::{BooleanExpression as BE, ComparisonOperator as CO, ValueExpression as VE}; - use datafusion::logical_expr::{col, Expr, Operator}; - match expr { - BE::Comparison { - left, - operator, - right, - } => { - let (var, prop, lit_expr) = match (left, right) { - (VE::Property(p), VE::Literal(val)) => { - (p.variable.as_str(), p.property.as_str(), to_df_literal(val)) - } - (VE::Literal(val), VE::Property(p)) => { - (p.variable.as_str(), p.property.as_str(), to_df_literal(val)) - } - _ => return None, - }; - let qualified = qualify(var, prop); - let op = match operator { - CO::Equal => Operator::Eq, - CO::NotEqual => Operator::NotEq, - CO::LessThan => Operator::Lt, - CO::LessThanOrEqual => Operator::LtEq, - CO::GreaterThan => Operator::Gt, - CO::GreaterThanOrEqual => Operator::GtEq, - }; - Some(Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr { - left: Box::new(col(&qualified)), - op, - right: Box::new(lit_expr), - })) - } - BE::And(l, r) => Some(datafusion::logical_expr::Expr::BinaryExpr( - datafusion::logical_expr::BinaryExpr { - left: Box::new(to_df_boolean_expr_with_vars(l, qualify)?), - op: Operator::And, - right: Box::new(to_df_boolean_expr_with_vars(r, qualify)?), - }, - )), - BE::Or(l, r) => Some(datafusion::logical_expr::Expr::BinaryExpr( - datafusion::logical_expr::BinaryExpr { - left: Box::new(to_df_boolean_expr_with_vars(l, qualify)?), - op: Operator::Or, - right: Box::new(to_df_boolean_expr_with_vars(r, qualify)?), - }, - )), - BE::Not(inner) => Some(datafusion::logical_expr::Expr::Not(Box::new( - to_df_boolean_expr_with_vars(inner, qualify)?, - ))), - _ => None, - } -} - -pub(super) fn to_df_literal(val: &crate::ast::PropertyValue) -> datafusion::logical_expr::Expr { - use datafusion::logical_expr::lit; - match val { - crate::ast::PropertyValue::String(s) => lit(s.clone()), - crate::ast::PropertyValue::Integer(i) => lit(*i), - crate::ast::PropertyValue::Float(f) => lit(*f), - crate::ast::PropertyValue::Boolean(b) => lit(*b), - crate::ast::PropertyValue::Null => { - datafusion::logical_expr::Expr::Literal(datafusion::scalar::ScalarValue::Null, None) - } - crate::ast::PropertyValue::Parameter(_) => lit(0), - crate::ast::PropertyValue::Property(prop) => datafusion::logical_expr::col(&prop.property), - } -} diff --git a/rust/lance-graph/src/query/aliases.rs b/rust/lance-graph/src/simple_executor/aliases.rs similarity index 55% rename from rust/lance-graph/src/query/aliases.rs rename to rust/lance-graph/src/simple_executor/aliases.rs index 785cea2..d53f8e7 100644 --- a/rust/lance-graph/src/query/aliases.rs +++ b/rust/lance-graph/src/simple_executor/aliases.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + pub(super) fn qualify_alias_property(alias: &str, property: &str) -> String { format!("{}__{}", alias, property) } diff --git a/rust/lance-graph/src/query/clauses.rs b/rust/lance-graph/src/simple_executor/clauses.rs similarity index 95% rename from rust/lance-graph/src/query/clauses.rs rename to rust/lance-graph/src/simple_executor/clauses.rs index 82dd6d5..493ca10 100644 --- a/rust/lance-graph/src/query/clauses.rs +++ b/rust/lance-graph/src/simple_executor/clauses.rs @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + use crate::error::Result; pub(super) fn apply_where_with_qualifier( @@ -5,8 +8,8 @@ pub(super) fn apply_where_with_qualifier( ast: &crate::ast::CypherQuery, qualify: &dyn Fn(&str, &str) -> String, ) -> Result { + use super::expr::to_df_boolean_expr_with_vars; use crate::error::GraphError; - use crate::query::expr::to_df_boolean_expr_with_vars; if let Some(where_clause) = &ast.where_clause { if let Some(expr) = to_df_boolean_expr_with_vars(&where_clause.expression, &|v, p| qualify(v, p)) diff --git a/rust/lance-graph/src/query/simple_executor.rs b/rust/lance-graph/src/simple_executor/expr.rs similarity index 51% rename from rust/lance-graph/src/query/simple_executor.rs rename to rust/lance-graph/src/simple_executor/expr.rs index 5a95bf0..1f27ba7 100644 --- a/rust/lance-graph/src/query/simple_executor.rs +++ b/rust/lance-graph/src/simple_executor/expr.rs @@ -1,14 +1,88 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -//! Helper functions for simple single-table query execution +//! Expression translation helpers for the simple executor + +pub(super) fn to_df_boolean_expr_with_vars( + expr: &crate::ast::BooleanExpression, + qualify: &F, +) -> Option +where + F: Fn(&str, &str) -> String, +{ + use crate::ast::{BooleanExpression as BE, ComparisonOperator as CO, ValueExpression as VE}; + use datafusion::logical_expr::{col, Expr, Operator}; + match expr { + BE::Comparison { + left, + operator, + right, + } => { + let (var, prop, lit_expr) = match (left, right) { + (VE::Property(p), VE::Literal(val)) => { + (p.variable.as_str(), p.property.as_str(), to_df_literal(val)) + } + (VE::Literal(val), VE::Property(p)) => { + (p.variable.as_str(), p.property.as_str(), to_df_literal(val)) + } + _ => return None, + }; + let qualified = qualify(var, prop); + let op = match operator { + CO::Equal => Operator::Eq, + CO::NotEqual => Operator::NotEq, + CO::LessThan => Operator::Lt, + CO::LessThanOrEqual => Operator::LtEq, + CO::GreaterThan => Operator::Gt, + CO::GreaterThanOrEqual => Operator::GtEq, + }; + Some(Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr { + left: Box::new(col(&qualified)), + op, + right: Box::new(lit_expr), + })) + } + BE::And(l, r) => Some(datafusion::logical_expr::Expr::BinaryExpr( + datafusion::logical_expr::BinaryExpr { + left: Box::new(to_df_boolean_expr_with_vars(l, qualify)?), + op: Operator::And, + right: Box::new(to_df_boolean_expr_with_vars(r, qualify)?), + }, + )), + BE::Or(l, r) => Some(datafusion::logical_expr::Expr::BinaryExpr( + datafusion::logical_expr::BinaryExpr { + left: Box::new(to_df_boolean_expr_with_vars(l, qualify)?), + op: Operator::Or, + right: Box::new(to_df_boolean_expr_with_vars(r, qualify)?), + }, + )), + BE::Not(inner) => Some(datafusion::logical_expr::Expr::Not(Box::new( + to_df_boolean_expr_with_vars(inner, qualify)?, + ))), + _ => None, + } +} + +pub(super) fn to_df_literal(val: &crate::ast::PropertyValue) -> datafusion::logical_expr::Expr { + use datafusion::logical_expr::lit; + match val { + crate::ast::PropertyValue::String(s) => lit(s.clone()), + crate::ast::PropertyValue::Integer(i) => lit(*i), + crate::ast::PropertyValue::Float(f) => lit(*f), + crate::ast::PropertyValue::Boolean(b) => lit(*b), + crate::ast::PropertyValue::Null => { + datafusion::logical_expr::Expr::Literal(datafusion::scalar::ScalarValue::Null, None) + } + crate::ast::PropertyValue::Parameter(_) => lit(0), + crate::ast::PropertyValue::Property(prop) => datafusion::logical_expr::col(&prop.property), + } +} /// Minimal translator for simple boolean expressions into DataFusion Expr -pub(super) fn to_df_boolean_expr_simple( +pub(crate) fn to_df_boolean_expr_simple( expr: &crate::ast::BooleanExpression, ) -> Option { use crate::ast::{BooleanExpression as BE, ComparisonOperator as CO, ValueExpression as VE}; - use crate::query::expr::to_df_literal; use datafusion::logical_expr::{col, Expr, Operator}; match expr { BE::Comparison { @@ -66,7 +140,7 @@ pub(super) fn to_df_boolean_expr_simple( } /// Build ORDER BY expressions for simple queries -pub(super) fn to_df_order_by_expr_simple( +pub(crate) fn to_df_order_by_expr_simple( items: &[crate::ast::OrderByItem], ) -> Vec { use datafusion::logical_expr::SortExpr; @@ -85,11 +159,10 @@ pub(super) fn to_df_order_by_expr_simple( } /// Build value expressions for simple queries -pub(super) fn to_df_value_expr_simple( +pub(crate) fn to_df_value_expr_simple( expr: &crate::ast::ValueExpression, ) -> datafusion::logical_expr::Expr { use crate::ast::ValueExpression as VE; - use crate::query::expr::to_df_literal; use datafusion::logical_expr::{col, lit}; match expr { VE::Property(prop) => col(&prop.property), diff --git a/rust/lance-graph/src/simple_executor/mod.rs b/rust/lance-graph/src/simple_executor/mod.rs new file mode 100644 index 0000000..1ea2484 --- /dev/null +++ b/rust/lance-graph/src/simple_executor/mod.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Simple single-table query executor with limited Cypher feature support +//! +//! This module provides a lightweight execution strategy for basic Cypher queries +//! that don't require the full DataFusion planner. It supports: +//! - Single-table scans with property filters +//! - Multi-hop path patterns via join chains +//! - Basic projections, DISTINCT, ORDER BY, SKIP, and LIMIT + +mod aliases; +mod clauses; +mod expr; +mod path_executor; + +pub(crate) use expr::{ + to_df_boolean_expr_simple, to_df_order_by_expr_simple, to_df_value_expr_simple, +}; +pub(crate) use path_executor::PathExecutor; diff --git a/rust/lance-graph/src/query/path_executor.rs b/rust/lance-graph/src/simple_executor/path_executor.rs similarity index 97% rename from rust/lance-graph/src/query/path_executor.rs rename to rust/lance-graph/src/simple_executor/path_executor.rs index 21e3000..3353d29 100644 --- a/rust/lance-graph/src/query/path_executor.rs +++ b/rust/lance-graph/src/simple_executor/path_executor.rs @@ -1,8 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + use crate::error::{GraphError, Result}; use datafusion::logical_expr::JoinType; // Internal helper that plans and executes a single path by chaining joins. -pub(super) struct PathExecutor<'a> { +pub(crate) struct PathExecutor<'a> { pub(super) ctx: &'a datafusion::prelude::SessionContext, pub(super) path: &'a crate::ast::PathPattern, pub(super) start_label: &'a str, @@ -22,7 +25,7 @@ struct SegMeta<'a> { } impl<'a> PathExecutor<'a> { - pub(super) fn new( + pub(crate) fn new( ctx: &'a datafusion::prelude::SessionContext, cfg: &'a crate::config::GraphConfig, path: &'a crate::ast::PathPattern, @@ -174,7 +177,7 @@ impl<'a> PathExecutor<'a> { }) } - pub(super) async fn build_chain(&self) -> Result { + pub(crate) async fn build_chain(&self) -> Result { // Start node let mut df = self .open_aliased(self.start_label, &self.start_alias) @@ -282,7 +285,7 @@ impl<'a> PathExecutor<'a> { None } - pub(super) fn apply_where( + pub(crate) fn apply_where( &self, df: datafusion::dataframe::DataFrame, ast: &crate::ast::CypherQuery, @@ -293,7 +296,7 @@ impl<'a> PathExecutor<'a> { }) } - pub(super) fn apply_return( + pub(crate) fn apply_return( &self, df: datafusion::dataframe::DataFrame, ast: &crate::ast::CypherQuery,