plotnik_compiler/parser/
ast.rs

1//! Typed AST wrappers over CST nodes.
2//!
3//! Each struct wraps a `SyntaxNode` and provides typed accessors.
4//! Cast is infallible for correct `SyntaxKind` - validation happens elsewhere.
5//!
6//! ## String Lifetime Limitation
7//!
8//! `SyntaxToken::text()` returns `&str` tied to the token's lifetime, not to the
9//! source `&'q str`. This is a rowan design: tokens store interned strings, not
10//! spans into the original source.
11//!
12//! When building data structures that need source-lifetime strings (e.g.,
13//! `SymbolTable<'q>`), use [`token_src`] instead of `token.text()`.
14
15use super::cst::{SyntaxKind, SyntaxNode, SyntaxToken};
16use rowan::TextRange;
17
18/// Extracts token text with source lifetime.
19///
20/// Use this instead of `token.text()` when you need `&'q str`.
21pub fn token_src<'q>(token: &SyntaxToken, source: &'q str) -> &'q str {
22    let range = token.text_range();
23    &source[range.start().into()..range.end().into()]
24}
25
26macro_rules! ast_node {
27    ($name:ident, $kind:ident) => {
28        #[derive(Debug, Clone, PartialEq, Eq, Hash)]
29        pub struct $name(SyntaxNode);
30
31        impl $name {
32            pub fn cast(node: SyntaxNode) -> Option<Self> {
33                Self::can_cast(node.kind()).then(|| Self(node))
34            }
35
36            pub fn can_cast(kind: SyntaxKind) -> bool {
37                kind == SyntaxKind::$kind
38            }
39
40            pub fn as_cst(&self) -> &SyntaxNode {
41                &self.0
42            }
43
44            pub fn text_range(&self) -> TextRange {
45                self.0.text_range()
46            }
47        }
48    };
49}
50
51macro_rules! define_expr {
52    ($($variant:ident),+ $(,)?) => {
53        /// Expression: any pattern that can appear in the tree.
54        #[derive(Debug, Clone, PartialEq, Eq, Hash)]
55        pub enum Expr {
56            $($variant($variant)),+
57        }
58
59        impl Expr {
60            pub fn cast(node: SyntaxNode) -> Option<Self> {
61                let kind = node.kind();
62                $(if $variant::can_cast(kind) { return Some(Expr::$variant($variant(node))); })+
63                None
64            }
65
66            pub fn as_cst(&self) -> &SyntaxNode {
67                match self { $(Expr::$variant(n) => n.as_cst()),+ }
68            }
69
70            pub fn text_range(&self) -> TextRange {
71                match self { $(Expr::$variant(n) => n.text_range()),+ }
72            }
73        }
74    };
75}
76
77impl Expr {
78    /// Returns direct child expressions.
79    pub fn children(&self) -> Vec<Expr> {
80        match self {
81            Expr::NamedNode(n) => n.children().collect(),
82            Expr::SeqExpr(s) => s.children().collect(),
83            Expr::CapturedExpr(c) => c.inner().into_iter().collect(),
84            Expr::QuantifiedExpr(q) => q.inner().into_iter().collect(),
85            Expr::FieldExpr(f) => f.value().into_iter().collect(),
86            Expr::AltExpr(a) => a.branches().filter_map(|b| b.body()).collect(),
87            Expr::Ref(_) | Expr::AnonymousNode(_) => vec![],
88        }
89    }
90}
91
92ast_node!(Root, Root);
93ast_node!(Def, Def);
94ast_node!(NamedNode, Tree);
95ast_node!(Ref, Ref);
96ast_node!(AltExpr, Alt);
97ast_node!(Branch, Branch);
98ast_node!(SeqExpr, Seq);
99ast_node!(CapturedExpr, Capture);
100ast_node!(Type, Type);
101ast_node!(QuantifiedExpr, Quantifier);
102ast_node!(FieldExpr, Field);
103ast_node!(NegatedField, NegatedField);
104ast_node!(Anchor, Anchor);
105ast_node!(NodePredicate, NodePredicate);
106ast_node!(RegexLiteral, Regex);
107
108/// Either an expression or an anchor in a sequence.
109#[derive(Debug, Clone, PartialEq, Eq, Hash)]
110pub enum SeqItem {
111    Expr(Expr),
112    Anchor(Anchor),
113}
114
115impl SeqItem {
116    pub fn cast(node: SyntaxNode) -> Option<Self> {
117        if let Some(expr) = Expr::cast(node.clone()) {
118            return Some(SeqItem::Expr(expr));
119        }
120        if let Some(anchor) = Anchor::cast(node) {
121            return Some(SeqItem::Anchor(anchor));
122        }
123        None
124    }
125
126    pub fn as_anchor(&self) -> Option<&Anchor> {
127        match self {
128            SeqItem::Anchor(a) => Some(a),
129            _ => None,
130        }
131    }
132
133    pub fn as_expr(&self) -> Option<&Expr> {
134        match self {
135            SeqItem::Expr(e) => Some(e),
136            _ => None,
137        }
138    }
139}
140
141/// Anonymous node: string literal (`"+"`) or wildcard (`_`).
142/// Maps from CST `Str` or `Wildcard`.
143#[derive(Debug, Clone, PartialEq, Eq, Hash)]
144pub struct AnonymousNode(SyntaxNode);
145
146impl AnonymousNode {
147    pub fn cast(node: SyntaxNode) -> Option<Self> {
148        Self::can_cast(node.kind()).then(|| Self(node))
149    }
150
151    pub fn can_cast(kind: SyntaxKind) -> bool {
152        matches!(kind, SyntaxKind::Str | SyntaxKind::Wildcard)
153    }
154
155    pub fn as_cst(&self) -> &SyntaxNode {
156        &self.0
157    }
158
159    pub fn text_range(&self) -> TextRange {
160        self.0.text_range()
161    }
162
163    /// Returns the string value if this is a literal, `None` if wildcard.
164    pub fn value(&self) -> Option<SyntaxToken> {
165        if self.0.kind() == SyntaxKind::Wildcard {
166            return None;
167        }
168        self.0
169            .children_with_tokens()
170            .filter_map(|it| it.into_token())
171            .find(|t| t.kind() == SyntaxKind::StrVal)
172    }
173
174    /// Returns true if this is the "any" wildcard (`_`).
175    pub fn is_any(&self) -> bool {
176        self.0.kind() == SyntaxKind::Wildcard
177    }
178}
179
180/// Whether an alternation uses tagged or untagged branches.
181#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
182pub enum AltKind {
183    /// All branches have labels: `[A: expr1 B: expr2]`
184    Tagged,
185    /// No branches have labels: `[expr1 expr2]`
186    Untagged,
187    /// Mixed tagged and untagged branches (invalid)
188    Mixed,
189}
190
191// Re-export PredicateOp from bytecode crate
192pub use plotnik_bytecode::PredicateOp;
193
194/// Convert SyntaxKind to PredicateOp.
195pub fn predicate_op_from_syntax_kind(kind: SyntaxKind) -> Option<PredicateOp> {
196    match kind {
197        SyntaxKind::OpEq => Some(PredicateOp::Eq),
198        SyntaxKind::OpNe => Some(PredicateOp::Ne),
199        SyntaxKind::OpStartsWith => Some(PredicateOp::StartsWith),
200        SyntaxKind::OpEndsWith => Some(PredicateOp::EndsWith),
201        SyntaxKind::OpContains => Some(PredicateOp::Contains),
202        SyntaxKind::OpRegexMatch => Some(PredicateOp::RegexMatch),
203        SyntaxKind::OpRegexNoMatch => Some(PredicateOp::RegexNoMatch),
204        _ => None,
205    }
206}
207
208/// Predicate value: either a string or a regex pattern.
209#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
210pub enum PredicateValue<'q> {
211    /// String literal value
212    String(&'q str),
213    /// Regex pattern (the content between `/` delimiters)
214    Regex(&'q str),
215}
216
217define_expr!(
218    NamedNode,
219    Ref,
220    AnonymousNode,
221    AltExpr,
222    SeqExpr,
223    CapturedExpr,
224    QuantifiedExpr,
225    FieldExpr,
226);
227
228impl Root {
229    pub fn defs(&self) -> impl Iterator<Item = Def> + '_ {
230        self.0.children().filter_map(Def::cast)
231    }
232
233    pub fn exprs(&self) -> impl Iterator<Item = Expr> + '_ {
234        self.0.children().filter_map(Expr::cast)
235    }
236}
237
238impl Def {
239    pub fn name(&self) -> Option<SyntaxToken> {
240        self.0
241            .children_with_tokens()
242            .filter_map(|it| it.into_token())
243            .find(|t| t.kind() == SyntaxKind::Id)
244    }
245
246    pub fn body(&self) -> Option<Expr> {
247        self.0.children().find_map(Expr::cast)
248    }
249}
250
251impl NamedNode {
252    pub fn node_type(&self) -> Option<SyntaxToken> {
253        self.0
254            .children_with_tokens()
255            .filter_map(|it| it.into_token())
256            .find(|t| {
257                matches!(
258                    t.kind(),
259                    SyntaxKind::Id
260                        | SyntaxKind::Underscore
261                        | SyntaxKind::KwError
262                        | SyntaxKind::KwMissing
263                )
264            })
265    }
266
267    /// Returns true if the node type is wildcard (`_`), matching any named node.
268    pub fn is_any(&self) -> bool {
269        self.node_type()
270            .map(|t| t.kind() == SyntaxKind::Underscore)
271            .unwrap_or(false)
272    }
273
274    /// Returns true if this is a MISSING node: `(MISSING ...)`.
275    pub fn is_missing(&self) -> bool {
276        self.node_type()
277            .map(|t| t.kind() == SyntaxKind::KwMissing)
278            .unwrap_or(false)
279    }
280
281    /// For MISSING nodes, returns the inner type constraint if present.
282    ///
283    /// `(MISSING identifier)` → Some("identifier")
284    /// `(MISSING ";")` → Some(";")
285    /// `(MISSING)` → None
286    pub fn missing_constraint(&self) -> Option<SyntaxToken> {
287        if !self.is_missing() {
288            return None;
289        }
290        // After KwMissing, look for Id or StrVal token
291        let mut found_missing = false;
292        for child in self.0.children_with_tokens() {
293            if let Some(token) = child.into_token() {
294                if token.kind() == SyntaxKind::KwMissing {
295                    found_missing = true;
296                } else if found_missing
297                    && matches!(token.kind(), SyntaxKind::Id | SyntaxKind::StrVal)
298                {
299                    return Some(token);
300                }
301            }
302        }
303        None
304    }
305
306    pub fn children(&self) -> impl Iterator<Item = Expr> + '_ {
307        self.0.children().filter_map(Expr::cast)
308    }
309
310    /// Returns all anchors in this node.
311    pub fn anchors(&self) -> impl Iterator<Item = Anchor> + '_ {
312        self.0.children().filter_map(Anchor::cast)
313    }
314
315    /// Returns children interleaved with anchors, preserving order.
316    pub fn items(&self) -> impl Iterator<Item = SeqItem> + '_ {
317        self.0.children().filter_map(SeqItem::cast)
318    }
319
320    /// Returns the predicate if present: `(identifier == "foo")`.
321    pub fn predicate(&self) -> Option<NodePredicate> {
322        self.0.children().find_map(NodePredicate::cast)
323    }
324}
325
326impl NodePredicate {
327    /// Returns the operator token.
328    pub fn operator_token(&self) -> Option<SyntaxToken> {
329        self.0
330            .children_with_tokens()
331            .filter_map(|it| it.into_token())
332            .find(|t| predicate_op_from_syntax_kind(t.kind()).is_some())
333    }
334
335    /// Returns the operator kind.
336    pub fn operator(&self) -> Option<PredicateOp> {
337        self.operator_token()
338            .and_then(|t| predicate_op_from_syntax_kind(t.kind()))
339    }
340
341    /// Returns the string value if the predicate uses a string.
342    pub fn string_value(&self) -> Option<SyntaxToken> {
343        self.0
344            .children_with_tokens()
345            .filter_map(|it| it.into_token())
346            .find(|t| t.kind() == SyntaxKind::StrVal)
347    }
348
349    /// Returns the regex literal if the predicate uses a regex.
350    pub fn regex(&self) -> Option<RegexLiteral> {
351        self.0.children().find_map(RegexLiteral::cast)
352    }
353
354    /// Returns the predicate value (string or regex pattern).
355    pub fn value<'q>(&self, source: &'q str) -> Option<PredicateValue<'q>> {
356        if let Some(str_token) = self.string_value() {
357            return Some(PredicateValue::String(token_src(&str_token, source)));
358        }
359        if let Some(regex) = self.regex() {
360            return Some(PredicateValue::Regex(regex.pattern(source)));
361        }
362        None
363    }
364}
365
366impl RegexLiteral {
367    /// Returns the regex pattern content (between the `/` delimiters).
368    pub fn pattern<'q>(&self, source: &'q str) -> &'q str {
369        let range = self.0.text_range();
370        let text = &source[usize::from(range.start())..usize::from(range.end())];
371
372        let Some(without_prefix) = text.strip_prefix('/') else {
373            return text;
374        };
375        without_prefix.strip_suffix('/').unwrap_or(without_prefix)
376    }
377}
378
379impl Ref {
380    pub fn name(&self) -> Option<SyntaxToken> {
381        self.0
382            .children_with_tokens()
383            .filter_map(|it| it.into_token())
384            .find(|t| t.kind() == SyntaxKind::Id)
385    }
386}
387
388impl AltExpr {
389    pub fn kind(&self) -> AltKind {
390        let mut tagged = false;
391        let mut untagged = false;
392
393        for child in self.0.children().filter(|c| c.kind() == SyntaxKind::Branch) {
394            let has_label = child
395                .children_with_tokens()
396                .filter_map(|it| it.into_token())
397                .any(|t| t.kind() == SyntaxKind::Id);
398
399            if has_label {
400                tagged = true;
401            } else {
402                untagged = true;
403            }
404        }
405
406        match (tagged, untagged) {
407            (true, true) => AltKind::Mixed,
408            (true, false) => AltKind::Tagged,
409            _ => AltKind::Untagged,
410        }
411    }
412
413    pub fn branches(&self) -> impl Iterator<Item = Branch> + '_ {
414        self.0.children().filter_map(Branch::cast)
415    }
416
417    pub fn exprs(&self) -> impl Iterator<Item = Expr> + '_ {
418        self.0.children().filter_map(Expr::cast)
419    }
420}
421
422impl Branch {
423    pub fn label(&self) -> Option<SyntaxToken> {
424        self.0
425            .children_with_tokens()
426            .filter_map(|it| it.into_token())
427            .find(|t| t.kind() == SyntaxKind::Id)
428    }
429
430    pub fn body(&self) -> Option<Expr> {
431        self.0.children().find_map(Expr::cast)
432    }
433}
434
435impl SeqExpr {
436    pub fn children(&self) -> impl Iterator<Item = Expr> + '_ {
437        self.0.children().filter_map(Expr::cast)
438    }
439
440    /// Returns all anchors in this sequence.
441    pub fn anchors(&self) -> impl Iterator<Item = Anchor> + '_ {
442        self.0.children().filter_map(Anchor::cast)
443    }
444
445    /// Returns children interleaved with anchors, preserving order.
446    pub fn items(&self) -> impl Iterator<Item = SeqItem> + '_ {
447        self.0.children().filter_map(SeqItem::cast)
448    }
449}
450
451impl CapturedExpr {
452    /// Returns the capture token (@name or @_name).
453    /// The token text includes the @ prefix.
454    pub fn name(&self) -> Option<SyntaxToken> {
455        self.0
456            .children_with_tokens()
457            .filter_map(|it| it.into_token())
458            .find(|t| {
459                matches!(
460                    t.kind(),
461                    SyntaxKind::CaptureToken | SyntaxKind::SuppressiveCapture
462                )
463            })
464    }
465
466    /// Returns true if this is a suppressive capture (@_ or @_name).
467    /// Suppressive captures match structurally but don't contribute to output.
468    pub fn is_suppressive(&self) -> bool {
469        self.0
470            .children_with_tokens()
471            .filter_map(|it| it.into_token())
472            .any(|t| t.kind() == SyntaxKind::SuppressiveCapture)
473    }
474
475    pub fn inner(&self) -> Option<Expr> {
476        self.0.children().find_map(Expr::cast)
477    }
478
479    pub fn type_annotation(&self) -> Option<Type> {
480        self.0.children().find_map(Type::cast)
481    }
482
483    /// Returns true if this capture has a `:: string` type annotation.
484    pub fn has_string_annotation(&self) -> bool {
485        self.type_annotation()
486            .is_some_and(|t| t.name().is_some_and(|n| n.text() == "string"))
487    }
488}
489
490impl Type {
491    pub fn name(&self) -> Option<SyntaxToken> {
492        self.0
493            .children_with_tokens()
494            .filter_map(|it| it.into_token())
495            .find(|t| t.kind() == SyntaxKind::Id)
496    }
497}
498
499impl QuantifiedExpr {
500    pub fn inner(&self) -> Option<Expr> {
501        self.0.children().find_map(Expr::cast)
502    }
503
504    pub fn operator(&self) -> Option<SyntaxToken> {
505        self.0
506            .children_with_tokens()
507            .filter_map(|it| it.into_token())
508            .find(|t| {
509                matches!(
510                    t.kind(),
511                    SyntaxKind::Star
512                        | SyntaxKind::Plus
513                        | SyntaxKind::Question
514                        | SyntaxKind::StarQuestion
515                        | SyntaxKind::PlusQuestion
516                        | SyntaxKind::QuestionQuestion
517                )
518            })
519    }
520
521    /// Returns true if quantifier allows zero matches (?, *, ??, *?).
522    pub fn is_optional(&self) -> bool {
523        self.operator()
524            .map(|op| {
525                matches!(
526                    op.kind(),
527                    SyntaxKind::Question
528                        | SyntaxKind::Star
529                        | SyntaxKind::QuestionQuestion
530                        | SyntaxKind::StarQuestion
531                )
532            })
533            .unwrap_or(false)
534    }
535}
536
537impl FieldExpr {
538    pub fn name(&self) -> Option<SyntaxToken> {
539        self.0
540            .children_with_tokens()
541            .filter_map(|it| it.into_token())
542            .find(|t| t.kind() == SyntaxKind::Id)
543    }
544
545    pub fn value(&self) -> Option<Expr> {
546        self.0.children().find_map(Expr::cast)
547    }
548}
549
550impl NegatedField {
551    pub fn name(&self) -> Option<SyntaxToken> {
552        self.0
553            .children_with_tokens()
554            .filter_map(|it| it.into_token())
555            .find(|t| t.kind() == SyntaxKind::Id)
556    }
557}
558
559/// Checks if expression is a truly empty scope (sequence/alternation with no children).
560/// Used to distinguish `{ } @x` (empty struct) from `{(expr) @_} @x` (Node capture).
561pub fn is_truly_empty_scope(inner: &Expr) -> bool {
562    match inner {
563        Expr::SeqExpr(seq) => seq.children().next().is_none(),
564        Expr::AltExpr(alt) => alt.branches().next().is_none(),
565        Expr::QuantifiedExpr(q) => q.inner().is_some_and(|i| is_truly_empty_scope(&i)),
566        _ => false,
567    }
568}