Skip to content

Commit 8ef290c

Browse files
committed
feat: support multiple string operators
1 parent ce32e7c commit 8ef290c

File tree

5 files changed

+585
-8
lines changed

5 files changed

+585
-8
lines changed

rust/lance-graph/src/ast.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,21 @@ pub enum BooleanExpression {
223223
expression: ValueExpression,
224224
pattern: String,
225225
},
226+
/// CONTAINS substring matching
227+
Contains {
228+
expression: ValueExpression,
229+
substring: String,
230+
},
231+
/// STARTS WITH prefix matching
232+
StartsWith {
233+
expression: ValueExpression,
234+
prefix: String,
235+
},
236+
/// ENDS WITH suffix matching
237+
EndsWith {
238+
expression: ValueExpression,
239+
suffix: String,
240+
},
226241
/// IS NULL pattern matching
227242
IsNull(ValueExpression),
228243
/// IS NOT NULL pattern matching

rust/lance-graph/src/datafusion_planner/expression.rs

Lines changed: 189 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@ use datafusion_functions_aggregate::min_max::max;
1313
use datafusion_functions_aggregate::min_max::min;
1414
use datafusion_functions_aggregate::sum::sum;
1515

16+
/// Helper function to create LIKE expressions with consistent settings
17+
fn create_like_expr(expression: &ValueExpression, pattern: &str, case_insensitive: bool) -> Expr {
18+
Expr::Like(datafusion::logical_expr::Like {
19+
negated: false,
20+
expr: Box::new(to_df_value_expr(expression)),
21+
pattern: Box::new(lit(pattern.to_string())),
22+
escape_char: None,
23+
case_insensitive,
24+
})
25+
}
26+
1627
/// Convert BooleanExpression to DataFusion Expr
1728
pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr {
1829
use crate::ast::{BooleanExpression as BE, ComparisonOperator as CO};
@@ -63,13 +74,25 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr {
6374
BE::Like {
6475
expression,
6576
pattern,
66-
} => Expr::Like(datafusion::logical_expr::Like {
67-
negated: false,
68-
expr: Box::new(to_df_value_expr(expression)),
69-
pattern: Box::new(lit(pattern.clone())),
70-
escape_char: None,
71-
case_insensitive: false,
72-
}),
77+
} => create_like_expr(expression, pattern, false),
78+
BE::Contains {
79+
expression,
80+
substring,
81+
} => {
82+
// CONTAINS is equivalent to LIKE '%substring%'
83+
let pattern = format!("%{}%", substring);
84+
create_like_expr(expression, &pattern, false)
85+
}
86+
BE::StartsWith { expression, prefix } => {
87+
// STARTS WITH is equivalent to LIKE 'prefix%'
88+
let pattern = format!("{}%", prefix);
89+
create_like_expr(expression, &pattern, false)
90+
}
91+
BE::EndsWith { expression, suffix } => {
92+
// ENDS WITH is equivalent to LIKE '%suffix'
93+
let pattern = format!("%{}", suffix);
94+
create_like_expr(expression, &pattern, false)
95+
}
7396
}
7497
}
7598

@@ -472,6 +495,165 @@ mod tests {
472495
assert!(s.contains("p__email"), "Should contain column reference");
473496
}
474497

498+
#[test]
499+
fn test_boolean_expr_contains() {
500+
let expr = BooleanExpression::Contains {
501+
expression: ValueExpression::Property(PropertyRef {
502+
variable: "p".into(),
503+
property: "name".into(),
504+
}),
505+
substring: "ali".into(),
506+
};
507+
508+
if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) {
509+
assert!(!like_expr.negated, "Should not be negated");
510+
assert!(!like_expr.case_insensitive, "Should be case sensitive");
511+
assert_eq!(like_expr.escape_char, None, "Should have no escape char");
512+
513+
// Check the expression is the column
514+
match *like_expr.expr {
515+
Expr::Column(ref col_expr) => {
516+
assert_eq!(col_expr.name(), "p__name");
517+
}
518+
other => panic!("Expected column expression, got {:?}", other),
519+
}
520+
521+
// Check pattern is '%ali%'
522+
match *like_expr.pattern {
523+
Expr::Literal(ref scalar, _) => {
524+
let s = format!("{:?}", scalar);
525+
assert!(s.contains("%ali%"), "Pattern should be '%ali%', got: {}", s);
526+
}
527+
other => panic!("Expected literal pattern, got {:?}", other),
528+
}
529+
} else {
530+
panic!("Expected Like expression");
531+
}
532+
}
533+
534+
#[test]
535+
fn test_boolean_expr_starts_with() {
536+
let expr = BooleanExpression::StartsWith {
537+
expression: ValueExpression::Property(PropertyRef {
538+
variable: "p".into(),
539+
property: "email".into(),
540+
}),
541+
prefix: "admin".into(),
542+
};
543+
544+
if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) {
545+
assert!(!like_expr.negated, "Should not be negated");
546+
assert!(!like_expr.case_insensitive, "Should be case sensitive");
547+
548+
// Check the expression is the column
549+
match *like_expr.expr {
550+
Expr::Column(ref col_expr) => {
551+
assert_eq!(col_expr.name(), "p__email");
552+
}
553+
other => panic!("Expected column expression, got {:?}", other),
554+
}
555+
556+
// Check pattern is 'admin%'
557+
match *like_expr.pattern {
558+
Expr::Literal(ref scalar, _) => {
559+
let s = format!("{:?}", scalar);
560+
assert!(
561+
s.contains("admin%"),
562+
"Pattern should be 'admin%', got: {}",
563+
s
564+
);
565+
}
566+
other => panic!("Expected literal pattern, got {:?}", other),
567+
}
568+
} else {
569+
panic!("Expected Like expression");
570+
}
571+
}
572+
573+
#[test]
574+
fn test_boolean_expr_ends_with() {
575+
let expr = BooleanExpression::EndsWith {
576+
expression: ValueExpression::Property(PropertyRef {
577+
variable: "p".into(),
578+
property: "email".into(),
579+
}),
580+
suffix: "@example.com".into(),
581+
};
582+
583+
if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) {
584+
assert!(!like_expr.negated, "Should not be negated");
585+
assert!(!like_expr.case_insensitive, "Should be case sensitive");
586+
587+
// Check the expression is the column
588+
match *like_expr.expr {
589+
Expr::Column(ref col_expr) => {
590+
assert_eq!(col_expr.name(), "p__email");
591+
}
592+
other => panic!("Expected column expression, got {:?}", other),
593+
}
594+
595+
// Check pattern is '%@example.com'
596+
match *like_expr.pattern {
597+
Expr::Literal(ref scalar, _) => {
598+
let s = format!("{:?}", scalar);
599+
assert!(
600+
s.contains("%@example.com"),
601+
"Pattern should be '%@example.com', got: {}",
602+
s
603+
);
604+
}
605+
other => panic!("Expected literal pattern, got {:?}", other),
606+
}
607+
} else {
608+
panic!("Expected Like expression");
609+
}
610+
}
611+
612+
#[test]
613+
fn test_boolean_expr_contains_case_sensitivity() {
614+
// Test that CONTAINS is case-sensitive (case_insensitive = false)
615+
let expr = BooleanExpression::Contains {
616+
expression: ValueExpression::Property(PropertyRef {
617+
variable: "p".into(),
618+
property: "name".into(),
619+
}),
620+
substring: "Test".into(),
621+
};
622+
623+
if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) {
624+
assert!(
625+
!like_expr.case_insensitive,
626+
"CONTAINS should be case-sensitive by default"
627+
);
628+
} else {
629+
panic!("Expected Like expression");
630+
}
631+
}
632+
633+
#[test]
634+
fn test_boolean_expr_string_operators_with_variable() {
635+
// Test that string operators work with variable references, not just properties
636+
let expr = BooleanExpression::Contains {
637+
expression: ValueExpression::Variable("name".into()),
638+
substring: "test".into(),
639+
};
640+
641+
if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) {
642+
match *like_expr.expr {
643+
Expr::Column(ref col_expr) => {
644+
assert_eq!(
645+
col_expr.name(),
646+
"name",
647+
"Should reference variable directly"
648+
);
649+
}
650+
other => panic!("Expected column expression, got {:?}", other),
651+
}
652+
} else {
653+
panic!("Expected Like expression");
654+
}
655+
}
656+
475657
// ========================================================================
476658
// Unit tests for to_df_value_expr()
477659
// ========================================================================

0 commit comments

Comments
 (0)