|
| 1 | +//! Predicate validation. |
| 2 | +//! |
| 3 | +//! Validates regex patterns in predicates for unsupported features: |
| 4 | +//! - Backreferences (`\1`) |
| 5 | +//! - Lookahead/lookbehind (`(?=...)`, `(?!...)`, etc.) |
| 6 | +//! - Named captures (`(?P<name>...)`) |
| 7 | +
|
| 8 | +use regex_syntax::ast::{self, visit, Ast, GroupKind, Visitor as RegexVisitor}; |
| 9 | +use rowan::TextRange; |
| 10 | + |
| 11 | +use crate::SourceId; |
| 12 | +use crate::analyze::visitor::{Visitor, walk_named_node}; |
| 13 | +use crate::diagnostics::{DiagnosticKind, Diagnostics}; |
| 14 | +use crate::parser::{NamedNode, Root}; |
| 15 | + |
| 16 | +pub fn validate_predicates( |
| 17 | + source_id: SourceId, |
| 18 | + source: &str, |
| 19 | + ast: &Root, |
| 20 | + diag: &mut Diagnostics, |
| 21 | +) { |
| 22 | + let mut validator = PredicateValidator { |
| 23 | + diag, |
| 24 | + source_id, |
| 25 | + source, |
| 26 | + }; |
| 27 | + validator.visit(ast); |
| 28 | +} |
| 29 | + |
| 30 | +struct PredicateValidator<'q, 'd> { |
| 31 | + diag: &'d mut Diagnostics, |
| 32 | + source_id: SourceId, |
| 33 | + source: &'q str, |
| 34 | +} |
| 35 | + |
| 36 | +impl Visitor for PredicateValidator<'_, '_> { |
| 37 | + fn visit_named_node(&mut self, node: &NamedNode) { |
| 38 | + if let Some(pred) = node.predicate() { |
| 39 | + // Predicates are only valid on leaf nodes (no children in pattern) |
| 40 | + if node.children().next().is_some() { |
| 41 | + self.diag |
| 42 | + .report( |
| 43 | + self.source_id, |
| 44 | + DiagnosticKind::PredicateOnNonLeaf, |
| 45 | + pred.as_cst().text_range(), |
| 46 | + ) |
| 47 | + .emit(); |
| 48 | + } |
| 49 | + |
| 50 | + // Validate regex syntax if this is a regex predicate |
| 51 | + if let Some(op) = pred.operator() |
| 52 | + && op.is_regex_op() |
| 53 | + && let Some(regex) = pred.regex() |
| 54 | + { |
| 55 | + self.validate_regex(regex.pattern(self.source), regex.text_range()); |
| 56 | + } |
| 57 | + } |
| 58 | + walk_named_node(self, node); |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +impl PredicateValidator<'_, '_> { |
| 63 | + fn validate_regex(&mut self, pattern: &str, regex_range: TextRange) { |
| 64 | + // Reject empty regex patterns |
| 65 | + if pattern.is_empty() { |
| 66 | + self.diag |
| 67 | + .report(self.source_id, DiagnosticKind::EmptyRegex, regex_range) |
| 68 | + .emit(); |
| 69 | + return; |
| 70 | + } |
| 71 | + |
| 72 | + // Parse with octal disabled so \1-\9 are backreferences, not octal |
| 73 | + let parser_result = ast::parse::ParserBuilder::new() |
| 74 | + .octal(false) |
| 75 | + .build() |
| 76 | + .parse(pattern); |
| 77 | + |
| 78 | + let parsed_ast = match parser_result { |
| 79 | + Ok(ast) => ast, |
| 80 | + Err(e) => { |
| 81 | + let span = self.map_regex_span(e.span(), regex_range); |
| 82 | + let report = match e.kind() { |
| 83 | + ast::ErrorKind::UnsupportedBackreference => { |
| 84 | + self.diag.report(self.source_id, DiagnosticKind::RegexBackreference, span) |
| 85 | + } |
| 86 | + ast::ErrorKind::UnsupportedLookAround => { |
| 87 | + // Skip the opening `(` - point at `?=` / `?!` / `?<=` / `?<!` |
| 88 | + use rowan::TextSize; |
| 89 | + let adjusted = TextRange::new(span.start() + TextSize::from(1u32), span.end()); |
| 90 | + self.diag.report(self.source_id, DiagnosticKind::RegexLookaround, adjusted) |
| 91 | + } |
| 92 | + _ => self |
| 93 | + .diag |
| 94 | + .report(self.source_id, DiagnosticKind::RegexSyntaxError, span) |
| 95 | + .message(format!("{}", e.kind())), |
| 96 | + }; |
| 97 | + report.emit(); |
| 98 | + return; |
| 99 | + } |
| 100 | + }; |
| 101 | + |
| 102 | + // Walk AST to find named captures |
| 103 | + let detector = NamedCaptureDetector { |
| 104 | + named_captures: Vec::new(), |
| 105 | + }; |
| 106 | + let detector = visit(&parsed_ast, detector).unwrap(); |
| 107 | + |
| 108 | + for capture_span in detector.named_captures { |
| 109 | + let span = self.map_regex_span(&capture_span, regex_range); |
| 110 | + self.diag |
| 111 | + .report(self.source_id, DiagnosticKind::RegexNamedCapture, span) |
| 112 | + .emit(); |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + /// Map a span within the regex pattern to a span in the query source. |
| 117 | + fn map_regex_span(&self, regex_span: &ast::Span, regex_range: TextRange) -> TextRange { |
| 118 | + // regex_range includes the `/` delimiters, so content starts at +1 |
| 119 | + let content_start = u32::from(regex_range.start()) + 1; |
| 120 | + let start = content_start + regex_span.start.offset as u32; |
| 121 | + let end = content_start + regex_span.end.offset as u32; |
| 122 | + TextRange::new(start.into(), end.into()) |
| 123 | + } |
| 124 | +} |
| 125 | + |
| 126 | +struct NamedCaptureDetector { |
| 127 | + named_captures: Vec<ast::Span>, |
| 128 | +} |
| 129 | + |
| 130 | +impl RegexVisitor for NamedCaptureDetector { |
| 131 | + type Output = Self; |
| 132 | + type Err = std::convert::Infallible; |
| 133 | + |
| 134 | + fn finish(self) -> Result<Self::Output, Self::Err> { |
| 135 | + Ok(self) |
| 136 | + } |
| 137 | + |
| 138 | + fn visit_pre(&mut self, ast: &Ast) -> Result<(), Self::Err> { |
| 139 | + if let Ast::Group(group) = ast |
| 140 | + && let GroupKind::CaptureName { name, .. } = &group.kind |
| 141 | + { |
| 142 | + // Span for `?P<name>` (skip opening paren, include closing `>`) |
| 143 | + let start = ast::Position::new(group.span.start.offset + 1, group.span.start.line, group.span.start.column + 1); |
| 144 | + let end = ast::Position::new(name.span.end.offset + 1, name.span.end.line, name.span.end.column + 1); |
| 145 | + self.named_captures.push(ast::Span::new(start, end)); |
| 146 | + } |
| 147 | + Ok(()) |
| 148 | + } |
| 149 | +} |
0 commit comments