Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
| `[...]` | Alternation (first match wins) |
| `Name = ...` | Named definition (entrypoint) |
| `(Name)` | Use named expression |
| `(node == "x")` | String predicate (== != ^= $= *=) |
| `(node =~ /x/)` | Regex predicate (=~ !~) |

## Data Model Rules

Expand Down Expand Up @@ -99,8 +101,8 @@ Rule: anchor is as strict as its strictest operand.
; WRONG: dot capture syntax
@function.name ; use @function_name

; WRONG: predicates (unsupported)
(id) @x (#eq? @x "foo")
; WRONG: tree-sitter predicate syntax
(id) @x (#eq? @x "foo") ; use (id == "foo") @x

; WRONG: boundary anchors without parent node
{. (a)} ; use (parent {. (a)})
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/plotnik-cli/src/commands/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub fn run(args: ExecArgs) {
color: args.color,
});

let vm = VM::builder(&tree).trivia_types(trivia_types).build();
let vm = VM::builder(&source_code, &tree).trivia_types(trivia_types).build();
let effects = match vm.execute(&module, 0, &entrypoint) {
Ok(effects) => effects,
Err(RuntimeError::NoMatch) => {
Expand Down
2 changes: 1 addition & 1 deletion crates/plotnik-cli/src/commands/trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub fn run(args: TraceArgs) {
color: args.color,
});

let vm = VM::builder(&tree)
let vm = VM::builder(&source_code, &tree)
.trivia_types(trivia_types)
.exec_fuel(args.fuel)
.build();
Expand Down
2 changes: 2 additions & 0 deletions crates/plotnik-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ crc32fast = "1.4"
memmap2 = "0.9"
plotnik-core.workspace = true
plotnik-langs = { workspace = true, optional = true }
regex-automata = { version = "0.4", features = ["dfa-build", "dfa-search"] }
regex-syntax = "0.8"

[features]
default = ["plotnik-langs"]
Expand Down
15 changes: 15 additions & 0 deletions crates/plotnik-lib/src/analyze/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,21 @@ impl<'a, 'q> Linker<'a, 'q> {
Expr::NamedNode(node) => {
let child_ctx = self.make_node_context(node);

// Predicates are only valid on leaf nodes (grammar check)
if let Some(pred) = node.predicate()
&& let Some(ctx) = &child_ctx
&& (!self.lang.valid_child_types(ctx.parent_id).is_empty()
|| !self.lang.fields_for_node_type(ctx.parent_id).is_empty())
{
self.diagnostics
.report(
self.source_id,
DiagnosticKind::PredicateOnNonLeaf,
pred.as_cst().text_range(),
)
.emit();
}

for child in node.children() {
if let Expr::FieldExpr(f) = &child {
self.validate_field_expr(f, child_ctx.as_ref(), visited);
Expand Down
20 changes: 20 additions & 0 deletions crates/plotnik-lib/src/analyze/link_tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
use crate::Query;
use indoc::indoc;

#[test]
fn predicate_on_non_leaf() {
let input = r"Q = (function_declaration == 'foo')";

let res = Query::expect_invalid_linking(input);

insta::assert_snapshot!(res, @r"
error: predicates match text content, but this node can contain children
|
1 | Q = (function_declaration == 'foo')
| ^^^^^^^^
");
}

#[test]
fn predicate_on_leaf_valid() {
let input = r#"Q = (identifier == "foo")"#;
Query::expect_valid_linking(input);
}

#[test]
fn valid_query_with_field() {
let input = indoc! {r#"
Expand Down
4 changes: 3 additions & 1 deletion crates/plotnik-lib/src/analyze/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,7 @@ pub use link::LinkOutput;
pub use recursion::validate_recursion;
pub use symbol_table::{SymbolTable, UNNAMED_DEF};
pub use type_check::{TypeContext, infer_types, primary_def_name};
pub use validation::{validate_alt_kinds, validate_anchors, validate_empty_constructs};
pub use validation::{
validate_alt_kinds, validate_anchors, validate_empty_constructs, validate_predicates,
};
pub use visitor::{Visitor, walk_expr};
5 changes: 5 additions & 0 deletions crates/plotnik-lib/src/analyze/validation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
//! - Alternation kind consistency (alt_kinds)
//! - Anchor placement rules (anchors)
//! - Empty constructs (empty_constructs)
//! - Predicate regex patterns (predicates)

pub mod alt_kinds;
pub mod anchors;
pub mod empty_constructs;
pub mod predicates;

#[cfg(test)]
mod alt_kinds_tests;
#[cfg(test)]
mod anchors_tests;
#[cfg(test)]
mod empty_constructs_tests;
#[cfg(test)]
mod predicates_tests;

pub use alt_kinds::validate_alt_kinds;
pub use anchors::validate_anchors;
pub use empty_constructs::validate_empty_constructs;
pub use predicates::validate_predicates;
137 changes: 137 additions & 0 deletions crates/plotnik-lib/src/analyze/validation/predicates.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//! Predicate validation.
//!
//! Validates regex patterns in predicates for unsupported features:
//! - Backreferences (`\1`)
//! - Lookahead/lookbehind (`(?=...)`, `(?!...)`, etc.)
//! - Named captures (`(?P<name>...)`)

use regex_syntax::ast::{self, visit, Ast, GroupKind, Visitor as RegexVisitor};
use rowan::TextRange;

use crate::SourceId;
use crate::analyze::visitor::{Visitor, walk_named_node};
use crate::diagnostics::{DiagnosticKind, Diagnostics};
use crate::parser::{NamedNode, Root};

pub fn validate_predicates(
source_id: SourceId,
source: &str,
ast: &Root,
diag: &mut Diagnostics,
) {
let mut validator = PredicateValidator {
diag,
source_id,
source,
};
validator.visit(ast);
}

struct PredicateValidator<'q, 'd> {
diag: &'d mut Diagnostics,
source_id: SourceId,
source: &'q str,
}

impl Visitor for PredicateValidator<'_, '_> {
fn visit_named_node(&mut self, node: &NamedNode) {
// Validate regex syntax if this is a regex predicate
if let Some(pred) = node.predicate()
&& let Some(op) = pred.operator()
&& op.is_regex_op()
&& let Some(regex) = pred.regex()
{
self.validate_regex(regex.pattern(self.source), regex.text_range());
}
walk_named_node(self, node);
}
}

impl PredicateValidator<'_, '_> {
fn validate_regex(&mut self, pattern: &str, regex_range: TextRange) {
// Reject empty regex patterns
if pattern.is_empty() {
self.diag
.report(self.source_id, DiagnosticKind::EmptyRegex, regex_range)
.emit();
return;
}

// Parse with octal disabled so \1-\9 are backreferences, not octal
let parser_result = ast::parse::ParserBuilder::new()
.octal(false)
.build()
.parse(pattern);

let parsed_ast = match parser_result {
Ok(ast) => ast,
Err(e) => {
let span = self.map_regex_span(e.span(), regex_range);
let report = match e.kind() {
ast::ErrorKind::UnsupportedBackreference => {
self.diag.report(self.source_id, DiagnosticKind::RegexBackreference, span)
}
ast::ErrorKind::UnsupportedLookAround => {
// Skip the opening `(` - point at `?=` / `?!` / `?<=` / `?<!`
use rowan::TextSize;
let adjusted = TextRange::new(span.start() + TextSize::from(1u32), span.end());
self.diag.report(self.source_id, DiagnosticKind::RegexLookaround, adjusted)
}
_ => self
.diag
.report(self.source_id, DiagnosticKind::RegexSyntaxError, span)
.message(format!("{}", e.kind())),
};
report.emit();
return;
}
};

// Walk AST to find named captures
let detector = NamedCaptureDetector {
named_captures: Vec::new(),
};
let detector = visit(&parsed_ast, detector).unwrap();

for capture_span in detector.named_captures {
let span = self.map_regex_span(&capture_span, regex_range);
self.diag
.report(self.source_id, DiagnosticKind::RegexNamedCapture, span)
.emit();
}
}

/// Map a span within the regex pattern to a span in the query source.
fn map_regex_span(&self, regex_span: &ast::Span, regex_range: TextRange) -> TextRange {
// regex_range includes the `/` delimiters, so content starts at +1
let content_start = u32::from(regex_range.start()) + 1;
let start = content_start + regex_span.start.offset as u32;
let end = content_start + regex_span.end.offset as u32;
TextRange::new(start.into(), end.into())
}
}

struct NamedCaptureDetector {
named_captures: Vec<ast::Span>,
}

impl RegexVisitor for NamedCaptureDetector {
type Output = Self;
type Err = std::convert::Infallible;

fn finish(self) -> Result<Self::Output, Self::Err> {
Ok(self)
}

fn visit_pre(&mut self, ast: &Ast) -> Result<(), Self::Err> {
if let Ast::Group(group) = ast
&& let GroupKind::CaptureName { name, .. } = &group.kind
{
// Span for `?P<name>` (skip opening paren, include closing `>`)
let start = ast::Position::new(group.span.start.offset + 1, group.span.start.line, group.span.start.column + 1);
let end = ast::Position::new(name.span.end.offset + 1, name.span.end.line, name.span.end.column + 1);
self.named_captures.push(ast::Span::new(start, end));
}
Ok(())
}
}
85 changes: 85 additions & 0 deletions crates/plotnik-lib/src/analyze/validation/predicates_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use crate::query::QueryAnalyzed;

#[test]
fn backreference_error() {
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /(.)\1/)");
assert!(!q.is_valid());
insta::assert_snapshot!(q.dump_diagnostics(), @r"
error: backreferences are not supported in regex
|
1 | Q = (identifier =~ /(.)\1/)
| ^^
");
}

#[test]
fn lookahead_error() {
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /foo(?=bar)/)");
assert!(!q.is_valid());
insta::assert_snapshot!(q.dump_diagnostics(), @r"
error: lookahead/lookbehind is not supported in regex
|
1 | Q = (identifier =~ /foo(?=bar)/)
| ^^
");
}

#[test]
fn lookbehind_error() {
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /(?<=foo)bar/)");
assert!(!q.is_valid());
insta::assert_snapshot!(q.dump_diagnostics(), @r"
error: lookahead/lookbehind is not supported in regex
|
1 | Q = (identifier =~ /(?<=foo)bar/)
| ^^^
");
}

#[test]
fn named_capture_error() {
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /(?P<name>foo)/)");
assert!(!q.is_valid());
insta::assert_snapshot!(q.dump_diagnostics(), @r"
error: named captures are not supported in regex
|
1 | Q = (identifier =~ /(?P<name>foo)/)
| ^^^^^^^^
");
}

#[test]
fn syntax_error() {
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /[/)");
assert!(!q.is_valid());
insta::assert_snapshot!(q.dump_diagnostics(), @r"
error: invalid regex syntax: unclosed character class
|
1 | Q = (identifier =~ /[/)
| ^
");
}

#[test]
fn empty_regex_error() {
let q = QueryAnalyzed::expect(r"Q = (identifier =~ //)");
assert!(!q.is_valid());
insta::assert_snapshot!(q.dump_diagnostics(), @r"
error: empty regex pattern
|
1 | Q = (identifier =~ //)
| ^^
");
}

#[test]
fn valid_regex() {
let q = QueryAnalyzed::expect(r"Q = (identifier =~ /^test_/)");
assert!(q.is_valid());
}

#[test]
fn valid_string_predicate() {
let q = QueryAnalyzed::expect(r#"Q = (identifier == "foo")"#);
assert!(q.is_valid());
}
Loading