Skip to content

Commit 9293a70

Browse files
committed
feat: Add predicate support for node text filtering
1 parent 62d6aff commit 9293a70

40 files changed

+1368
-133
lines changed

AGENTS.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
| `[...]` | Alternation (first match wins) |
3131
| `Name = ...` | Named definition (entrypoint) |
3232
| `(Name)` | Use named expression |
33+
| `(node == "x")` | String predicate (== != ^= $= *=) |
34+
| `(node =~ /x/)` | Regex predicate (=~ !~) |
3335

3436
## Data Model Rules
3537

@@ -99,8 +101,8 @@ Rule: anchor is as strict as its strictest operand.
99101
; WRONG: dot capture syntax
100102
@function.name ; use @function_name
101103
102-
; WRONG: predicates (unsupported)
103-
(id) @x (#eq? @x "foo")
104+
; WRONG: tree-sitter predicate syntax
105+
(id) @x (#eq? @x "foo") ; use (id == "foo") @x
104106
105107
; WRONG: boundary anchors without parent node
106108
{. (a)} ; use (parent {. (a)})

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/plotnik-cli/src/commands/exec.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ pub fn run(args: ExecArgs) {
3535
color: args.color,
3636
});
3737

38-
let vm = VM::builder(&tree).trivia_types(trivia_types).build();
38+
let vm = VM::builder(&source_code, &tree).trivia_types(trivia_types).build();
3939
let effects = match vm.execute(&module, 0, &entrypoint) {
4040
Ok(effects) => effects,
4141
Err(RuntimeError::NoMatch) => {

crates/plotnik-cli/src/commands/trace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ pub fn run(args: TraceArgs) {
3939
color: args.color,
4040
});
4141

42-
let vm = VM::builder(&tree)
42+
let vm = VM::builder(&source_code, &tree)
4343
.trivia_types(trivia_types)
4444
.exec_fuel(args.fuel)
4545
.build();

crates/plotnik-lib/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ crc32fast = "1.4"
2525
memmap2 = "0.9"
2626
plotnik-core.workspace = true
2727
plotnik-langs = { workspace = true, optional = true }
28+
regex-automata = { version = "0.4", features = ["dfa-build", "dfa-search"] }
29+
regex-syntax = "0.8"
2830

2931
[features]
3032
default = ["plotnik-langs"]

crates/plotnik-lib/src/analyze/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,7 @@ pub use link::LinkOutput;
3232
pub use recursion::validate_recursion;
3333
pub use symbol_table::{SymbolTable, UNNAMED_DEF};
3434
pub use type_check::{TypeContext, infer_types, primary_def_name};
35-
pub use validation::{validate_alt_kinds, validate_anchors, validate_empty_constructs};
35+
pub use validation::{
36+
validate_alt_kinds, validate_anchors, validate_empty_constructs, validate_predicates,
37+
};
3638
pub use visitor::{Visitor, walk_expr};

crates/plotnik-lib/src/analyze/validation/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,23 @@
44
//! - Alternation kind consistency (alt_kinds)
55
//! - Anchor placement rules (anchors)
66
//! - Empty constructs (empty_constructs)
7+
//! - Predicate regex patterns (predicates)
78
89
pub mod alt_kinds;
910
pub mod anchors;
1011
pub mod empty_constructs;
12+
pub mod predicates;
1113

1214
#[cfg(test)]
1315
mod alt_kinds_tests;
1416
#[cfg(test)]
1517
mod anchors_tests;
1618
#[cfg(test)]
1719
mod empty_constructs_tests;
20+
#[cfg(test)]
21+
mod predicates_tests;
1822

1923
pub use alt_kinds::validate_alt_kinds;
2024
pub use anchors::validate_anchors;
2125
pub use empty_constructs::validate_empty_constructs;
26+
pub use predicates::validate_predicates;
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
//! Predicate validation.
2+
//!
3+
//! Validates regex patterns in predicates for unsupported features:
4+
//! - Backreferences (`\1`)
5+
//! - Lookahead/lookbehind (`(?=...)`, `(?!...)`, etc.)
6+
//! - Named captures (`(?P<name>...)`)
7+
8+
use regex_syntax::ast::{self, visit, Ast, GroupKind, Visitor as RegexVisitor};
9+
use rowan::TextRange;
10+
11+
use crate::SourceId;
12+
use crate::analyze::visitor::{Visitor, walk_named_node};
13+
use crate::diagnostics::{DiagnosticKind, Diagnostics};
14+
use crate::parser::{NamedNode, Root};
15+
16+
pub fn validate_predicates(
17+
source_id: SourceId,
18+
source: &str,
19+
ast: &Root,
20+
diag: &mut Diagnostics,
21+
) {
22+
let mut validator = PredicateValidator {
23+
diag,
24+
source_id,
25+
source,
26+
};
27+
validator.visit(ast);
28+
}
29+
30+
struct PredicateValidator<'q, 'd> {
31+
diag: &'d mut Diagnostics,
32+
source_id: SourceId,
33+
source: &'q str,
34+
}
35+
36+
impl Visitor for PredicateValidator<'_, '_> {
37+
fn visit_named_node(&mut self, node: &NamedNode) {
38+
if let Some(pred) = node.predicate() {
39+
// Predicates are only valid on leaf nodes (no children in pattern)
40+
if node.children().next().is_some() {
41+
self.diag
42+
.report(
43+
self.source_id,
44+
DiagnosticKind::PredicateOnNonLeaf,
45+
pred.as_cst().text_range(),
46+
)
47+
.emit();
48+
}
49+
50+
// Validate regex syntax if this is a regex predicate
51+
if let Some(op) = pred.operator()
52+
&& op.is_regex_op()
53+
&& let Some(regex) = pred.regex()
54+
{
55+
self.validate_regex(regex.pattern(self.source), regex.text_range());
56+
}
57+
}
58+
walk_named_node(self, node);
59+
}
60+
}
61+
62+
impl PredicateValidator<'_, '_> {
63+
fn validate_regex(&mut self, pattern: &str, regex_range: TextRange) {
64+
// Reject empty regex patterns
65+
if pattern.is_empty() {
66+
self.diag
67+
.report(self.source_id, DiagnosticKind::EmptyRegex, regex_range)
68+
.emit();
69+
return;
70+
}
71+
72+
// Parse with octal disabled so \1-\9 are backreferences, not octal
73+
let parser_result = ast::parse::ParserBuilder::new()
74+
.octal(false)
75+
.build()
76+
.parse(pattern);
77+
78+
let parsed_ast = match parser_result {
79+
Ok(ast) => ast,
80+
Err(e) => {
81+
let span = self.map_regex_span(e.span(), regex_range);
82+
let report = match e.kind() {
83+
ast::ErrorKind::UnsupportedBackreference => {
84+
self.diag.report(self.source_id, DiagnosticKind::RegexBackreference, span)
85+
}
86+
ast::ErrorKind::UnsupportedLookAround => {
87+
// Skip the opening `(` - point at `?=` / `?!` / `?<=` / `?<!`
88+
use rowan::TextSize;
89+
let adjusted = TextRange::new(span.start() + TextSize::from(1u32), span.end());
90+
self.diag.report(self.source_id, DiagnosticKind::RegexLookaround, adjusted)
91+
}
92+
_ => self
93+
.diag
94+
.report(self.source_id, DiagnosticKind::RegexSyntaxError, span)
95+
.message(format!("{}", e.kind())),
96+
};
97+
report.emit();
98+
return;
99+
}
100+
};
101+
102+
// Walk AST to find named captures
103+
let detector = NamedCaptureDetector {
104+
named_captures: Vec::new(),
105+
};
106+
let detector = visit(&parsed_ast, detector).unwrap();
107+
108+
for capture_span in detector.named_captures {
109+
let span = self.map_regex_span(&capture_span, regex_range);
110+
self.diag
111+
.report(self.source_id, DiagnosticKind::RegexNamedCapture, span)
112+
.emit();
113+
}
114+
}
115+
116+
/// Map a span within the regex pattern to a span in the query source.
117+
fn map_regex_span(&self, regex_span: &ast::Span, regex_range: TextRange) -> TextRange {
118+
// regex_range includes the `/` delimiters, so content starts at +1
119+
let content_start = u32::from(regex_range.start()) + 1;
120+
let start = content_start + regex_span.start.offset as u32;
121+
let end = content_start + regex_span.end.offset as u32;
122+
TextRange::new(start.into(), end.into())
123+
}
124+
}
125+
126+
struct NamedCaptureDetector {
127+
named_captures: Vec<ast::Span>,
128+
}
129+
130+
impl RegexVisitor for NamedCaptureDetector {
131+
type Output = Self;
132+
type Err = std::convert::Infallible;
133+
134+
fn finish(self) -> Result<Self::Output, Self::Err> {
135+
Ok(self)
136+
}
137+
138+
fn visit_pre(&mut self, ast: &Ast) -> Result<(), Self::Err> {
139+
if let Ast::Group(group) = ast
140+
&& let GroupKind::CaptureName { name, .. } = &group.kind
141+
{
142+
// Span for `?P<name>` (skip opening paren, include closing `>`)
143+
let start = ast::Position::new(group.span.start.offset + 1, group.span.start.line, group.span.start.column + 1);
144+
let end = ast::Position::new(name.span.end.offset + 1, name.span.end.line, name.span.end.column + 1);
145+
self.named_captures.push(ast::Span::new(start, end));
146+
}
147+
Ok(())
148+
}
149+
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
use crate::query::QueryAnalyzed;
2+
3+
#[test]
4+
fn backreference_error() {
5+
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /(.)\1/)");
6+
assert!(!q.is_valid());
7+
insta::assert_snapshot!(q.dump_diagnostics(), @r"
8+
error: backreferences are not supported in regex
9+
|
10+
1 | Q = (identifier =~ /(.)\1/)
11+
| ^^
12+
");
13+
}
14+
15+
#[test]
16+
fn lookahead_error() {
17+
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /foo(?=bar)/)");
18+
assert!(!q.is_valid());
19+
insta::assert_snapshot!(q.dump_diagnostics(), @r"
20+
error: lookahead/lookbehind is not supported in regex
21+
|
22+
1 | Q = (identifier =~ /foo(?=bar)/)
23+
| ^^
24+
");
25+
}
26+
27+
#[test]
28+
fn lookbehind_error() {
29+
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /(?<=foo)bar/)");
30+
assert!(!q.is_valid());
31+
insta::assert_snapshot!(q.dump_diagnostics(), @r"
32+
error: lookahead/lookbehind is not supported in regex
33+
|
34+
1 | Q = (identifier =~ /(?<=foo)bar/)
35+
| ^^^
36+
");
37+
}
38+
39+
#[test]
40+
fn named_capture_error() {
41+
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /(?P<name>foo)/)");
42+
assert!(!q.is_valid());
43+
insta::assert_snapshot!(q.dump_diagnostics(), @r"
44+
error: named captures are not supported in regex
45+
|
46+
1 | Q = (identifier =~ /(?P<name>foo)/)
47+
| ^^^^^^^^
48+
");
49+
}
50+
51+
#[test]
52+
fn syntax_error() {
53+
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /[/)");
54+
assert!(!q.is_valid());
55+
insta::assert_snapshot!(q.dump_diagnostics(), @r"
56+
error: invalid regex syntax: unclosed character class
57+
|
58+
1 | Q = (identifier =~ /[/)
59+
| ^
60+
");
61+
}
62+
63+
#[test]
64+
fn predicate_on_non_leaf() {
65+
let q = QueryAnalyzed::expect(r"Q = (function_declaration == 'foo' (identifier))");
66+
assert!(!q.is_valid());
67+
insta::assert_snapshot!(q.dump_diagnostics(), @r"
68+
error: predicates are only valid on named leaf nodes
69+
|
70+
1 | Q = (function_declaration == 'foo' (identifier))
71+
| ^^^^^^^^
72+
");
73+
}
74+
75+
#[test]
76+
fn empty_regex_error() {
77+
let q = QueryAnalyzed::expect(r"Q = (identifier =~ //)");
78+
assert!(!q.is_valid());
79+
insta::assert_snapshot!(q.dump_diagnostics(), @r"
80+
error: empty regex pattern
81+
|
82+
1 | Q = (identifier =~ //)
83+
| ^^
84+
");
85+
}
86+
87+
#[test]
88+
fn valid_regex() {
89+
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /^test_/)");
90+
assert!(q.is_valid());
91+
}
92+
93+
#[test]
94+
fn valid_string_predicate() {
95+
let q = QueryAnalyzed::expect(r#"Q = (identifier == "foo")"#);
96+
assert!(q.is_valid());
97+
}

0 commit comments

Comments
 (0)