plotnik_compiler/analyze/validation/
predicates.rs

1//! Predicate validation.
2//!
3//! Validates regex patterns in predicates for unsupported features:
4//! - Backreferences (`\1`)
5//! - Lookahead/lookbehind (`(?=...)`, `(?!...)`, etc.)
6//! - Named captures (`(?P<name>...)`)
7
8use regex_syntax::ast::{self, Ast, GroupKind, Visitor as RegexVisitor, visit};
9use rowan::TextRange;
10
11use crate::SourceId;
12use crate::analyze::visitor::{Visitor, walk_named_node};
13use crate::diagnostics::{DiagnosticKind, Diagnostics};
14use crate::parser::{NamedNode, Root};
15
16pub fn validate_predicates(source_id: SourceId, source: &str, ast: &Root, diag: &mut Diagnostics) {
17    let mut validator = PredicateValidator {
18        diag,
19        source_id,
20        source,
21    };
22    validator.visit(ast);
23}
24
25struct PredicateValidator<'q, 'd> {
26    diag: &'d mut Diagnostics,
27    source_id: SourceId,
28    source: &'q str,
29}
30
31impl Visitor for PredicateValidator<'_, '_> {
32    fn visit_named_node(&mut self, node: &NamedNode) {
33        // Validate regex syntax if this is a regex predicate
34        if let Some(pred) = node.predicate()
35            && let Some(op) = pred.operator()
36            && op.is_regex_op()
37            && let Some(regex) = pred.regex()
38        {
39            self.validate_regex(regex.pattern(self.source), regex.text_range());
40        }
41        walk_named_node(self, node);
42    }
43}
44
45impl PredicateValidator<'_, '_> {
46    fn validate_regex(&mut self, pattern: &str, regex_range: TextRange) {
47        // Reject empty regex patterns
48        if pattern.is_empty() {
49            self.diag
50                .report(self.source_id, DiagnosticKind::EmptyRegex, regex_range)
51                .emit();
52            return;
53        }
54
55        // Parse with octal disabled so \1-\9 are backreferences, not octal
56        let parser_result = ast::parse::ParserBuilder::new()
57            .octal(false)
58            .build()
59            .parse(pattern);
60
61        let parsed_ast = match parser_result {
62            Ok(ast) => ast,
63            Err(e) => {
64                let span = self.map_regex_span(e.span(), regex_range);
65                let report = match e.kind() {
66                    ast::ErrorKind::UnsupportedBackreference => {
67                        self.diag
68                            .report(self.source_id, DiagnosticKind::RegexBackreference, span)
69                    }
70                    ast::ErrorKind::UnsupportedLookAround => {
71                        // Skip the opening `(` - point at `?=` / `?!` / `?<=` / `?<!`
72                        use rowan::TextSize;
73                        let adjusted =
74                            TextRange::new(span.start() + TextSize::from(1u32), span.end());
75                        self.diag
76                            .report(self.source_id, DiagnosticKind::RegexLookaround, adjusted)
77                    }
78                    _ => self
79                        .diag
80                        .report(self.source_id, DiagnosticKind::RegexSyntaxError, span)
81                        .message(format!("{}", e.kind())),
82                };
83                report.emit();
84                return;
85            }
86        };
87
88        // Walk AST to find named captures
89        let detector = NamedCaptureDetector {
90            named_captures: Vec::new(),
91        };
92        let detector = visit(&parsed_ast, detector).unwrap();
93
94        for capture_span in detector.named_captures {
95            let span = self.map_regex_span(&capture_span, regex_range);
96            self.diag
97                .report(self.source_id, DiagnosticKind::RegexNamedCapture, span)
98                .emit();
99        }
100    }
101
102    /// Map a span within the regex pattern to a span in the query source.
103    fn map_regex_span(&self, regex_span: &ast::Span, regex_range: TextRange) -> TextRange {
104        // regex_range includes the `/` delimiters, so content starts at +1
105        let content_start = u32::from(regex_range.start()) + 1;
106        let start = content_start + regex_span.start.offset as u32;
107        let end = content_start + regex_span.end.offset as u32;
108        TextRange::new(start.into(), end.into())
109    }
110}
111
112struct NamedCaptureDetector {
113    named_captures: Vec<ast::Span>,
114}
115
116impl RegexVisitor for NamedCaptureDetector {
117    type Output = Self;
118    type Err = std::convert::Infallible;
119
120    fn finish(self) -> Result<Self::Output, Self::Err> {
121        Ok(self)
122    }
123
124    fn visit_pre(&mut self, ast: &Ast) -> Result<(), Self::Err> {
125        if let Ast::Group(group) = ast
126            && let GroupKind::CaptureName { name, .. } = &group.kind
127        {
128            // Span for `?P<name>` (skip opening paren, include closing `>`)
129            let start = ast::Position::new(
130                group.span.start.offset + 1,
131                group.span.start.line,
132                group.span.start.column + 1,
133            );
134            let end = ast::Position::new(
135                name.span.end.offset + 1,
136                name.span.end.line,
137                name.span.end.column + 1,
138            );
139            self.named_captures.push(ast::Span::new(start, end));
140        }
141        Ok(())
142    }
143}