plotnik_compiler/analyze/validation/
predicates.rs

1//! Predicate validation.
2//!
3//! Validates regex patterns in predicates for unsupported features:
4//! - Backreferences (`\1`)
5//! - Lookahead/lookbehind (`(?=...)`, `(?!...)`, etc.)
6//! - Named captures (`(?P<name>...)`)
7
8use regex_syntax::ast::{self, visit, Ast, GroupKind, Visitor as RegexVisitor};
9use rowan::TextRange;
10
11use crate::SourceId;
12use crate::analyze::visitor::{Visitor, walk_named_node};
13use crate::diagnostics::{DiagnosticKind, Diagnostics};
14use crate::parser::{NamedNode, Root};
15
16pub fn validate_predicates(
17    source_id: SourceId,
18    source: &str,
19    ast: &Root,
20    diag: &mut Diagnostics,
21) {
22    let mut validator = PredicateValidator {
23        diag,
24        source_id,
25        source,
26    };
27    validator.visit(ast);
28}
29
30struct PredicateValidator<'q, 'd> {
31    diag: &'d mut Diagnostics,
32    source_id: SourceId,
33    source: &'q str,
34}
35
36impl Visitor for PredicateValidator<'_, '_> {
37    fn visit_named_node(&mut self, node: &NamedNode) {
38        // Validate regex syntax if this is a regex predicate
39        if let Some(pred) = node.predicate()
40            && let Some(op) = pred.operator()
41            && op.is_regex_op()
42            && let Some(regex) = pred.regex()
43        {
44            self.validate_regex(regex.pattern(self.source), regex.text_range());
45        }
46        walk_named_node(self, node);
47    }
48}
49
50impl PredicateValidator<'_, '_> {
51    fn validate_regex(&mut self, pattern: &str, regex_range: TextRange) {
52        // Reject empty regex patterns
53        if pattern.is_empty() {
54            self.diag
55                .report(self.source_id, DiagnosticKind::EmptyRegex, regex_range)
56                .emit();
57            return;
58        }
59
60        // Parse with octal disabled so \1-\9 are backreferences, not octal
61        let parser_result = ast::parse::ParserBuilder::new()
62            .octal(false)
63            .build()
64            .parse(pattern);
65
66        let parsed_ast = match parser_result {
67            Ok(ast) => ast,
68            Err(e) => {
69                let span = self.map_regex_span(e.span(), regex_range);
70                let report = match e.kind() {
71                    ast::ErrorKind::UnsupportedBackreference => {
72                        self.diag.report(self.source_id, DiagnosticKind::RegexBackreference, span)
73                    }
74                    ast::ErrorKind::UnsupportedLookAround => {
75                        // Skip the opening `(` - point at `?=` / `?!` / `?<=` / `?<!`
76                        use rowan::TextSize;
77                        let adjusted = TextRange::new(span.start() + TextSize::from(1u32), span.end());
78                        self.diag.report(self.source_id, DiagnosticKind::RegexLookaround, adjusted)
79                    }
80                    _ => self
81                        .diag
82                        .report(self.source_id, DiagnosticKind::RegexSyntaxError, span)
83                        .message(format!("{}", e.kind())),
84                };
85                report.emit();
86                return;
87            }
88        };
89
90        // Walk AST to find named captures
91        let detector = NamedCaptureDetector {
92            named_captures: Vec::new(),
93        };
94        let detector = visit(&parsed_ast, detector).unwrap();
95
96        for capture_span in detector.named_captures {
97            let span = self.map_regex_span(&capture_span, regex_range);
98            self.diag
99                .report(self.source_id, DiagnosticKind::RegexNamedCapture, span)
100                .emit();
101        }
102    }
103
104    /// Map a span within the regex pattern to a span in the query source.
105    fn map_regex_span(&self, regex_span: &ast::Span, regex_range: TextRange) -> TextRange {
106        // regex_range includes the `/` delimiters, so content starts at +1
107        let content_start = u32::from(regex_range.start()) + 1;
108        let start = content_start + regex_span.start.offset as u32;
109        let end = content_start + regex_span.end.offset as u32;
110        TextRange::new(start.into(), end.into())
111    }
112}
113
114struct NamedCaptureDetector {
115    named_captures: Vec<ast::Span>,
116}
117
118impl RegexVisitor for NamedCaptureDetector {
119    type Output = Self;
120    type Err = std::convert::Infallible;
121
122    fn finish(self) -> Result<Self::Output, Self::Err> {
123        Ok(self)
124    }
125
126    fn visit_pre(&mut self, ast: &Ast) -> Result<(), Self::Err> {
127        if let Ast::Group(group) = ast
128            && let GroupKind::CaptureName { name, .. } = &group.kind
129        {
130            // Span for `?P<name>` (skip opening paren, include closing `>`)
131            let start = ast::Position::new(group.span.start.offset + 1, group.span.start.line, group.span.start.column + 1);
132            let end = ast::Position::new(name.span.end.offset + 1, name.span.end.line, name.span.end.column + 1);
133            self.named_captures.push(ast::Span::new(start, end));
134        }
135        Ok(())
136    }
137}