plotnik_compiler/compile/capture.rs
1//! Capture effects handling for query compilation.
2//!
3//! Manages the construction and propagation of capture effects (Node/Text + Set)
4//! through the compilation pipeline.
5
6use std::collections::HashSet;
7
8use crate::analyze::type_check::{TypeContext, TypeId, TypeShape};
9use crate::bytecode::EffectIR;
10use crate::parser::ast::{self, Expr};
11use plotnik_bytecode::EffectOpcode;
12
13use super::Compiler;
14use super::navigation::{inner_creates_scope, is_star_or_plus_quantifier, is_truly_empty_scope};
15
16/// Capture effects to attach to match instructions.
17///
18/// Instead of emitting separate epsilon transitions for wrapper effects,
19/// these effects are propagated through the compilation chain and attached
20/// directly to match instructions.
21///
22/// For sequences `{a b c}`:
23/// - `pre` effects go on the first item (entry)
24/// - `post` effects go on the last item (exit)
25///
26/// For tagged alternations `[A: body]`:
27/// - `pre` contains `Enum(variant)` for branch entry
28/// - `post` contains `EndEnum` for branch exit
29#[derive(Clone, Default)]
30pub struct CaptureEffects {
31 /// Effects to place as pre_effects on the entry instruction.
32 /// Used for: Enum(variant) in tagged alternations.
33 pub pre: Vec<EffectIR>,
34 /// Effects to place as post_effects on the exit instruction.
35 /// Typically: [Node/Text, Set(member)], [Push], or [EndEnum].
36 pub post: Vec<EffectIR>,
37}
38
39impl CaptureEffects {
40 /// Create with explicit pre and post effects.
41 pub fn new(pre: Vec<EffectIR>, post: Vec<EffectIR>) -> Self {
42 Self { pre, post }
43 }
44
45 /// Create with only pre effects.
46 pub fn new_pre(pre: Vec<EffectIR>) -> Self {
47 Self { pre, post: vec![] }
48 }
49
50 /// Create with only post effects.
51 pub fn new_post(post: Vec<EffectIR>) -> Self {
52 Self { pre: vec![], post }
53 }
54
55 /// Add an inner scope (opens after existing scopes, closes before them).
56 ///
57 /// Use for: Obj/EndObj, Enum/EndEnum, Arr/EndArr, SuppressBegin/SuppressEnd
58 ///
59 /// Given existing `pre=[A_Open]`, `post=[A_Close]`, adding inner scope B:
60 /// - Result: `pre=[A_Open, B_Open]`, `post=[B_Close, A_Close]`
61 /// - Execution: A opens -> B opens -> match -> B closes -> A closes
62 pub fn nest_scope(mut self, open: EffectIR, close: EffectIR) -> Self {
63 assert!(
64 matches!(
65 open.opcode,
66 EffectOpcode::Obj
67 | EffectOpcode::Enum
68 | EffectOpcode::Arr
69 | EffectOpcode::SuppressBegin
70 ),
71 "nest_scope expects scope-opening effect, got {:?}",
72 open.opcode
73 );
74 assert!(
75 matches!(
76 close.opcode,
77 EffectOpcode::EndObj
78 | EffectOpcode::EndEnum
79 | EffectOpcode::EndArr
80 | EffectOpcode::SuppressEnd
81 ),
82 "nest_scope expects scope-closing effect, got {:?}",
83 close.opcode
84 );
85 self.pre.push(open);
86 self.post.insert(0, close);
87 self
88 }
89
90 /// Add pre-match value effects (run after all scopes open).
91 ///
92 /// Use for: Null+Set injection in untagged alternations
93 ///
94 /// Given `pre=[Scope_Open]`, adding value effects:
95 /// - Result: `pre=[Scope_Open, Value1, Value2]`
96 pub fn with_pre_values(mut self, effects: Vec<EffectIR>) -> Self {
97 self.pre.extend(effects);
98 self
99 }
100
101 /// Add post-match value effects (run before any scope closes).
102 ///
103 /// Use for: Node/Text+Set capture effects, Push for arrays
104 ///
105 /// Given `post=[Scope_Close]`, adding value effects:
106 /// - Result: `post=[Value1, Value2, Scope_Close]`
107 pub fn with_post_values(mut self, effects: Vec<EffectIR>) -> Self {
108 self.post.splice(0..0, effects);
109 self
110 }
111}
112
113impl Compiler<'_> {
114 /// Build capture effects (Node/Text + Set) based on capture type.
115 pub(super) fn build_capture_effects(
116 &self,
117 cap: &ast::CapturedExpr,
118 inner: Option<&Expr>,
119 ) -> Vec<EffectIR> {
120 let mut effects = Vec::with_capacity(2);
121
122 // Skip Node/Text when the value comes from somewhere other than matched_node:
123 // 1. Refs returning structured types (Call leaves result pending)
124 // 2. Scope-creating expressions (Seq/Alt) producing structured types (EndObj/EndEnum)
125 // 3. Array captures (EndArr produces value)
126 let is_structured_ref = inner.is_some_and(|i| self.is_ref_returning_structured(i));
127 let is_array = is_star_or_plus_quantifier(inner);
128
129 // Check if inner is a scope-creating expression (SeqExpr/AltExpr) that produces
130 // a structured type (Struct/Enum) or truly empty struct. Named nodes with bubble
131 // captures don't count - they still need Node because we're capturing the matched
132 // node, not the struct.
133 //
134 // For FieldExpr, look through to the value. The parser treats `field: expr @cap` as
135 // `(field: expr) @cap` so that quantifiers work on fields (e.g., `decorator: (x)*`
136 // for repeating fields). This means captures wrap the FieldExpr, but the value
137 // determines whether it produces a structured type. See `parse_expr_no_suffix`.
138 let creates_structured_scope = inner.and_then(unwrap_field_value).is_some_and(|ei| {
139 // Truly empty scopes (like `{ }`) produce empty struct
140 if is_truly_empty_scope(&ei) {
141 return true;
142 }
143 if !inner_creates_scope(&ei) {
144 return false;
145 }
146 let Some(info) = self.ctx.type_ctx.get_term_info(&ei) else {
147 return false;
148 };
149 info.flow
150 .type_id()
151 .and_then(|id| self.ctx.type_ctx.get_type(id))
152 .is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_)))
153 });
154
155 if !is_structured_ref && !creates_structured_scope && !is_array {
156 let effect = if cap.has_string_annotation() {
157 EffectIR::text()
158 } else {
159 EffectIR::node()
160 };
161 effects.push(effect);
162 }
163
164 // Add Set effect if we have a capture name.
165 // Always look up in the current scope - bubble captures don't create new scopes,
166 // so all fields (including nested bubble captures) reference the same root struct.
167 if let Some(name_token) = cap.name() {
168 let capture_name = &name_token.text()[1..]; // Strip @ prefix
169 let member_ref = self.lookup_member_in_scope(capture_name);
170 if let Some(member_ref) = member_ref {
171 effects.push(EffectIR::with_member(EffectOpcode::Set, member_ref));
172 }
173 }
174
175 effects
176 }
177
178 /// Check if a quantifier body needs Node effect before Push.
179 ///
180 /// For scalar array elements (Node/String types), we need [Node/Text, Push]
181 /// to capture the matched node value.
182 /// For structured elements (Struct/Enum), EndObj/EndEnum provides the value.
183 /// For refs returning structured types, Call provides the value.
184 pub(super) fn quantifier_needs_node_for_push(&self, expr: &Expr) -> bool {
185 let Expr::QuantifiedExpr(quant) = expr else {
186 return true;
187 };
188 let Some(inner) = quant.inner() else {
189 return true;
190 };
191
192 // Refs returning structured types don't need Node
193 if self.is_ref_returning_structured(&inner) {
194 return false;
195 }
196
197 // Check the actual inferred type, not syntax
198 let Some(info) = self.ctx.type_ctx.get_term_info(&inner) else {
199 return true;
200 };
201
202 // If type is Struct or Enum, EndObj/EndEnum produces the value
203 // Otherwise (Node, String, Void, etc.), we need Node effect
204 !info
205 .flow
206 .type_id()
207 .and_then(|id| self.ctx.type_ctx.get_type(id))
208 .is_some_and(|shape| matches!(shape, TypeShape::Struct(_) | TypeShape::Enum(_)))
209 }
210
211 /// Check if expr is (or wraps) a ref returning a structured type.
212 ///
213 /// For such refs, we skip the Node effect in captures - the Call leaves
214 /// the structured result (Enum/Struct/Array) pending for Set to consume.
215 pub(super) fn is_ref_returning_structured(&self, expr: &Expr) -> bool {
216 match expr {
217 Expr::Ref(r) => self.ref_returns_structured(r),
218 Expr::QuantifiedExpr(q) => q
219 .inner()
220 .is_some_and(|i| self.is_ref_returning_structured(&i)),
221 Expr::CapturedExpr(c) => c
222 .inner()
223 .is_some_and(|i| self.is_ref_returning_structured(&i)),
224 Expr::FieldExpr(f) => f
225 .value()
226 .is_some_and(|v| self.is_ref_returning_structured(&v)),
227 _ => false,
228 }
229 }
230
231 /// Check if a Ref points to a definition returning a structured type.
232 ///
233 /// All refs now use Call/Return. If the definition returns a structured type
234 /// (Enum/Struct/Array), Return leaves that result pending for Set to consume.
235 /// In this case, we skip emitting Node/Text effects in captures.
236 fn ref_returns_structured(&self, r: &ast::Ref) -> bool {
237 r.name()
238 .and_then(|name| self.ctx.type_ctx.get_def_id(self.ctx.interner, name.text()))
239 .and_then(|def_id| self.ctx.type_ctx.get_def_type(def_id))
240 .and_then(|def_type| self.ctx.type_ctx.get_type(def_type))
241 .is_some_and(|shape| {
242 matches!(
243 shape,
244 TypeShape::Struct(_) | TypeShape::Enum(_) | TypeShape::Array { .. }
245 )
246 })
247 }
248
249 /// Collect all capture names from an expression recursively.
250 pub(super) fn collect_captures(expr: &Expr) -> HashSet<String> {
251 fn collect(expr: &Expr, names: &mut HashSet<String>) {
252 if let Expr::CapturedExpr(cap) = expr
253 && let Some(name) = cap.name()
254 {
255 names.insert(name.text()[1..].to_string()); // Strip @ prefix
256 }
257 for child in expr.children() {
258 collect(&child, names);
259 }
260 }
261 let mut names = HashSet::new();
262 collect(expr, &mut names);
263 names
264 }
265}
266
267/// Unwrap FieldExpr to get its value, pass through other expressions.
268///
269/// Used when checking properties of a captured expression - captures on fields
270/// like `field: [A: B:] @cap` wrap the FieldExpr, but we need to inspect the value.
271fn unwrap_field_value(expr: &Expr) -> Option<Expr> {
272 match expr {
273 Expr::FieldExpr(f) => f.value(),
274 other => Some(other.clone()),
275 }
276}
277
278/// Check if inner needs struct wrapper for array iterations.
279///
280/// Returns true when the inner expression produces a Struct type (bubbling fields).
281/// This includes:
282/// - Sequences/alternations with captures: `{(a) @x (b) @y}*`
283/// - Named nodes with bubble captures: `(node (child) @x)*`
284///
285/// Enums use Enum/EndEnum instead (handled separately).
286pub fn check_needs_struct_wrapper(inner: &Expr, type_ctx: &TypeContext) -> bool {
287 let Some(info) = type_ctx.get_term_info(inner) else {
288 return false;
289 };
290
291 // Must be a bubble (fields flow to parent scope)
292 if !info.flow.is_bubble() {
293 return false;
294 }
295
296 // Check the actual type - if it's a Struct, we need Obj/EndObj wrapper
297 info.flow
298 .type_id()
299 .and_then(|id| type_ctx.get_type(id))
300 .is_some_and(|shape| matches!(shape, TypeShape::Struct(_)))
301}
302
303/// Get row type ID for array element scoping.
304pub fn get_row_type_id(inner: &Expr, type_ctx: &TypeContext) -> Option<TypeId> {
305 type_ctx
306 .get_term_info(inner)
307 .and_then(|info| info.flow.type_id())
308}