plotnik_compiler/compile/
capture.rs

1//! Capture effects handling for query compilation.
2//!
3//! Manages the construction and propagation of capture effects (Node/Text + Set)
4//! through the compilation pipeline.
5
6use std::collections::HashSet;
7
8use crate::analyze::type_check::{TypeContext, TypeId, TypeShape};
9use crate::bytecode::EffectIR;
10use crate::parser::ast::{self, Expr};
11use plotnik_bytecode::EffectOpcode;
12
13use super::Compiler;
14use super::navigation::{inner_creates_scope, is_star_or_plus_quantifier, is_truly_empty_scope};
15
16/// Capture effects to attach to match instructions.
17///
18/// Instead of emitting separate epsilon transitions for wrapper effects,
19/// these effects are propagated through the compilation chain and attached
20/// directly to match instructions.
21///
22/// For sequences `{a b c}`:
23/// - `pre` effects go on the first item (entry)
24/// - `post` effects go on the last item (exit)
25///
26/// For tagged alternations `[A: body]`:
27/// - `pre` contains `Enum(variant)` for branch entry
28/// - `post` contains `EndEnum` for branch exit
29#[derive(Clone, Default)]
30pub struct CaptureEffects {
31    /// Effects to place as pre_effects on the entry instruction.
32    /// Used for: Enum(variant) in tagged alternations.
33    pub pre: Vec<EffectIR>,
34    /// Effects to place as post_effects on the exit instruction.
35    /// Typically: [Node/Text, Set(member)], [Push], or [EndEnum].
36    pub post: Vec<EffectIR>,
37}
38
39impl CaptureEffects {
40    /// Create with explicit pre and post effects.
41    pub fn new(pre: Vec<EffectIR>, post: Vec<EffectIR>) -> Self {
42        Self { pre, post }
43    }
44
45    /// Create with only pre effects.
46    pub fn new_pre(pre: Vec<EffectIR>) -> Self {
47        Self { pre, post: vec![] }
48    }
49
50    /// Create with only post effects.
51    pub fn new_post(post: Vec<EffectIR>) -> Self {
52        Self { pre: vec![], post }
53    }
54
55    /// Add an inner scope (opens after existing scopes, closes before them).
56    ///
57    /// Use for: Obj/EndObj, Enum/EndEnum, Arr/EndArr, SuppressBegin/SuppressEnd
58    ///
59    /// Given existing `pre=[A_Open]`, `post=[A_Close]`, adding inner scope B:
60    /// - Result: `pre=[A_Open, B_Open]`, `post=[B_Close, A_Close]`
61    /// - Execution: A opens -> B opens -> match -> B closes -> A closes
62    pub fn nest_scope(mut self, open: EffectIR, close: EffectIR) -> Self {
63        assert!(
64            matches!(
65                open.opcode,
66                EffectOpcode::Obj
67                    | EffectOpcode::Enum
68                    | EffectOpcode::Arr
69                    | EffectOpcode::SuppressBegin
70            ),
71            "nest_scope expects scope-opening effect, got {:?}",
72            open.opcode
73        );
74        assert!(
75            matches!(
76                close.opcode,
77                EffectOpcode::EndObj
78                    | EffectOpcode::EndEnum
79                    | EffectOpcode::EndArr
80                    | EffectOpcode::SuppressEnd
81            ),
82            "nest_scope expects scope-closing effect, got {:?}",
83            close.opcode
84        );
85        self.pre.push(open);
86        self.post.insert(0, close);
87        self
88    }
89
90    /// Add pre-match value effects (run after all scopes open).
91    ///
92    /// Use for: Null+Set injection in untagged alternations
93    ///
94    /// Given `pre=[Scope_Open]`, adding value effects:
95    /// - Result: `pre=[Scope_Open, Value1, Value2]`
96    pub fn with_pre_values(mut self, effects: Vec<EffectIR>) -> Self {
97        self.pre.extend(effects);
98        self
99    }
100
101    /// Add post-match value effects (run before any scope closes).
102    ///
103    /// Use for: Node/Text+Set capture effects, Push for arrays
104    ///
105    /// Given `post=[Scope_Close]`, adding value effects:
106    /// - Result: `post=[Value1, Value2, Scope_Close]`
107    pub fn with_post_values(mut self, effects: Vec<EffectIR>) -> Self {
108        self.post.splice(0..0, effects);
109        self
110    }
111}
112
113impl Compiler<'_> {
114    /// Build capture effects (Node/Text + Set) based on capture type.
115    pub(super) fn build_capture_effects(
116        &self,
117        cap: &ast::CapturedExpr,
118        inner: Option<&Expr>,
119    ) -> Vec<EffectIR> {
120        let mut effects = Vec::with_capacity(2);
121
122        // Skip Node/Text when the value comes from somewhere other than matched_node:
123        // 1. Refs returning structured types (Call leaves result pending)
124        // 2. Scope-creating expressions (Seq/Alt) producing structured types (EndObj/EndEnum)
125        // 3. Array captures (EndArr produces value)
126        let is_structured_ref = inner.is_some_and(|i| self.is_ref_returning_structured(i));
127        let is_array = is_star_or_plus_quantifier(inner);
128
129        // Check if inner is a scope-creating expression (SeqExpr/AltExpr) that produces
130        // a structured type (Struct/Enum) or truly empty struct. Named nodes with bubble
131        // captures don't count - they still need Node because we're capturing the matched
132        // node, not the struct.
133        //
134        // For FieldExpr, look through to the value. The parser treats `field: expr @cap` as
135        // `(field: expr) @cap` so that quantifiers work on fields (e.g., `decorator: (x)*`
136        // for repeating fields). This means captures wrap the FieldExpr, but the value
137        // determines whether it produces a structured type. See `parse_expr_no_suffix`.
138        let creates_structured_scope = inner.and_then(unwrap_field_value).is_some_and(|ei| {
139            // Truly empty scopes (like `{ }`) produce empty struct
140            if is_truly_empty_scope(&ei) {
141                return true;
142            }
143            if !inner_creates_scope(&ei) {
144                return false;
145            }
146            let Some(info) = self.ctx.type_ctx.get_term_info(&ei) else {
147                return false;
148            };
149            info.flow
150                .type_id()
151                .and_then(|id| self.ctx.type_ctx.get_type(id))
152                .is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_)))
153        });
154
155        if !is_structured_ref && !creates_structured_scope && !is_array {
156            let effect = if cap.has_string_annotation() {
157                EffectIR::text()
158            } else {
159                EffectIR::node()
160            };
161            effects.push(effect);
162        }
163
164        // Add Set effect if we have a capture name.
165        // Always look up in the current scope - bubble captures don't create new scopes,
166        // so all fields (including nested bubble captures) reference the same root struct.
167        if let Some(name_token) = cap.name() {
168            let capture_name = &name_token.text()[1..]; // Strip @ prefix
169            let member_ref = self.lookup_member_in_scope(capture_name);
170            if let Some(member_ref) = member_ref {
171                effects.push(EffectIR::with_member(EffectOpcode::Set, member_ref));
172            }
173        }
174
175        effects
176    }
177
178    /// Check if a quantifier body needs Node effect before Push.
179    ///
180    /// For scalar array elements (Node/String types), we need [Node/Text, Push]
181    /// to capture the matched node value.
182    /// For structured elements (Struct/Enum), EndObj/EndEnum provides the value.
183    /// For refs returning structured types, Call provides the value.
184    pub(super) fn quantifier_needs_node_for_push(&self, expr: &Expr) -> bool {
185        let Expr::QuantifiedExpr(quant) = expr else {
186            return true;
187        };
188        let Some(inner) = quant.inner() else {
189            return true;
190        };
191
192        // Refs returning structured types don't need Node
193        if self.is_ref_returning_structured(&inner) {
194            return false;
195        }
196
197        // Check the actual inferred type, not syntax
198        let Some(info) = self.ctx.type_ctx.get_term_info(&inner) else {
199            return true;
200        };
201
202        // If type is Struct or Enum, EndObj/EndEnum produces the value
203        // Otherwise (Node, String, Void, etc.), we need Node effect
204        !info
205            .flow
206            .type_id()
207            .and_then(|id| self.ctx.type_ctx.get_type(id))
208            .is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_)))
209    }
210
211    /// Check if expr is (or wraps) a ref returning a structured type.
212    ///
213    /// For such refs, we skip the Node effect in captures - the Call leaves
214    /// the structured result (Enum/Struct/Array) pending for Set to consume.
215    pub(super) fn is_ref_returning_structured(&self, expr: &Expr) -> bool {
216        match expr {
217            Expr::Ref(r) => self.ref_returns_structured(r),
218            Expr::QuantifiedExpr(q) => q
219                .inner()
220                .is_some_and(|i| self.is_ref_returning_structured(&i)),
221            Expr::CapturedExpr(c) => c
222                .inner()
223                .is_some_and(|i| self.is_ref_returning_structured(&i)),
224            Expr::FieldExpr(f) => f
225                .value()
226                .is_some_and(|v| self.is_ref_returning_structured(&v)),
227            _ => false,
228        }
229    }
230
231    /// Check if a Ref points to a definition returning a structured type.
232    ///
233    /// All refs now use Call/Return. If the definition returns a structured type
234    /// (Enum/Struct/Array), Return leaves that result pending for Set to consume.
235    /// In this case, we skip emitting Node/Text effects in captures.
236    fn ref_returns_structured(&self, r: &ast::Ref) -> bool {
237        r.name()
238            .and_then(|name| self.ctx.type_ctx.get_def_id(self.ctx.interner, name.text()))
239            .and_then(|def_id| self.ctx.type_ctx.get_def_type(def_id))
240            .and_then(|def_type| self.ctx.type_ctx.get_type(def_type))
241            .is_some_and(|shape| {
242                matches!(
243                    shape,
244                    TypeShape::Struct(_) | TypeShape::Enum(_) | TypeShape::Array { .. }
245                )
246            })
247    }
248
249    /// Collect all capture names from an expression recursively.
250    pub(super) fn collect_captures(expr: &Expr) -> HashSet<String> {
251        fn collect(expr: &Expr, names: &mut HashSet<String>) {
252            if let Expr::CapturedExpr(cap) = expr
253                && let Some(name) = cap.name()
254            {
255                names.insert(name.text()[1..].to_string()); // Strip @ prefix
256            }
257            for child in expr.children() {
258                collect(&child, names);
259            }
260        }
261        let mut names = HashSet::new();
262        collect(expr, &mut names);
263        names
264    }
265}
266
267/// Unwrap FieldExpr to get its value, pass through other expressions.
268///
269/// Used when checking properties of a captured expression - captures on fields
270/// like `field: [A: B:] @cap` wrap the FieldExpr, but we need to inspect the value.
271fn unwrap_field_value(expr: &Expr) -> Option<Expr> {
272    match expr {
273        Expr::FieldExpr(f) => f.value(),
274        other => Some(other.clone()),
275    }
276}
277
278/// Check if inner needs struct wrapper for array iterations.
279///
280/// Returns true when the inner expression produces a Struct type (bubbling fields).
281/// This includes:
282/// - Sequences/alternations with captures: `{(a) @x (b) @y}*`
283/// - Named nodes with bubble captures: `(node (child) @x)*`
284///
285/// Enums use Enum/EndEnum instead (handled separately).
286pub fn check_needs_struct_wrapper(inner: &Expr, type_ctx: &TypeContext) -> bool {
287    let Some(info) = type_ctx.get_term_info(inner) else {
288        return false;
289    };
290
291    // Must be a bubble (fields flow to parent scope)
292    if !info.flow.is_bubble() {
293        return false;
294    }
295
296    // Check the actual type - if it's a Struct, we need Obj/EndObj wrapper
297    info.flow
298        .type_id()
299        .and_then(|id| type_ctx.get_type(id))
300        .is_some_and(|shape| matches!(shape, TypeShape::Struct(_)))
301}
302
303/// Get row type ID for array element scoping.
304pub fn get_row_type_id(inner: &Expr, type_ctx: &TypeContext) -> Option<TypeId> {
305    type_ctx
306        .get_term_info(inner)
307        .and_then(|info| info.flow.type_id())
308}