1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
//! Unified quantifier compilation.
//!
//! Consolidates the 6+ code paths for quantified expression compilation into
//! a single unified approach with configuration for:
//! - Whether it's inside an array scope
//! - Whether it's skippable (first-child with Down navigation)
//! - Whether skip/match need separate exits
use crate::analyze::type_check::TypeId;
use crate::bytecode::{EffectIR, Label};
use crate::parser::SyntaxKind;
use crate::parser::ast::{self, Expr};
use plotnik_bytecode::Nav;
use super::Compiler;
use super::capture::{CaptureEffects, check_needs_struct_wrapper, get_row_type_id};
use super::navigation::is_star_or_plus_quantifier;
/// Quantifier operator classification.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum QuantifierKind {
/// `?` - matches 0 or 1 time
Optional,
/// `??` - non-greedy optional
OptionalNonGreedy,
/// `*` - matches 0 or more times
Star,
/// `*?` - non-greedy star
StarNonGreedy,
/// `+` - matches 1 or more times
Plus,
/// `+?` - non-greedy plus
PlusNonGreedy,
}
impl QuantifierKind {
/// Parse from a SyntaxKind.
pub fn from_syntax(kind: SyntaxKind) -> Option<Self> {
match kind {
SyntaxKind::Question => Some(Self::Optional),
SyntaxKind::QuestionQuestion => Some(Self::OptionalNonGreedy),
SyntaxKind::Star => Some(Self::Star),
SyntaxKind::StarQuestion => Some(Self::StarNonGreedy),
SyntaxKind::Plus => Some(Self::Plus),
SyntaxKind::PlusQuestion => Some(Self::PlusNonGreedy),
_ => None,
}
}
/// Returns true if this is a greedy quantifier.
pub fn is_greedy(self) -> bool {
matches!(self, Self::Optional | Self::Star | Self::Plus)
}
}
/// Result of parsing a quantified expression.
enum QuantifierParse {
/// No inner expression found.
Empty,
/// Inner expression exists but no valid quantifier operator.
Plain(Expr),
/// Valid quantified expression with inner and kind.
Quantified { inner: Expr, kind: QuantifierKind },
}
/// Parse a quantified expression into its components.
///
/// Returns `Empty` if no inner, `Plain` if inner but no quantifier,
/// `Quantified` if both inner and valid quantifier operator.
fn parse_quantifier(quant: &ast::QuantifiedExpr) -> QuantifierParse {
let Some(inner) = quant.inner() else {
return QuantifierParse::Empty;
};
let Some(op) = quant.operator() else {
return QuantifierParse::Plain(inner);
};
match QuantifierKind::from_syntax(op.kind()) {
Some(kind) => QuantifierParse::Quantified { inner, kind },
None => QuantifierParse::Plain(inner),
}
}
/// Configuration for unified quantifier compilation.
pub struct QuantifierConfig<'a> {
/// The inner expression to match.
pub inner: &'a Expr,
/// The quantifier kind.
pub kind: QuantifierKind,
/// Navigation for first iteration.
pub first_nav: Option<Nav>,
/// Whether this is inside an array capture context (needs Push on each iteration).
pub in_array_context: bool,
/// Capture effects for each iteration (e.g., Push for arrays).
pub element_capture: CaptureEffects,
/// When true, skip and match paths need separate exit labels.
pub needs_split_exits: bool,
/// Exit for match path (when needs_split_exits is true).
pub match_exit: Label,
/// Exit for skip path (when needs_split_exits is true). Only used for skippable quantifiers.
pub skip_exit: Option<Label>,
}
impl Compiler<'_> {
/// Compile a quantified expression with capture effects (passed to body).
pub(super) fn compile_quantified_inner(
&mut self,
quant: &ast::QuantifiedExpr,
exit: Label,
nav_override: Option<Nav>,
capture: CaptureEffects,
) -> Label {
let (inner, kind) = match parse_quantifier(quant) {
QuantifierParse::Empty => return exit,
QuantifierParse::Plain(inner) => {
return self.compile_expr_inner(&inner, exit, nav_override, capture);
}
QuantifierParse::Quantified { inner, kind } => (inner, kind),
};
// When the inner returns a structured type (enum/struct) and this is a star/plus
// quantifier without explicit capture, we still need array scope (Arr/Push/EndArr)
// because the type system expects an array of these values.
let needs_implicit_array = matches!(kind, QuantifierKind::Star | QuantifierKind::Plus)
&& self.is_ref_returning_structured(&inner);
if needs_implicit_array {
// Use array scope: Arr → quantifier with Push → EndArr → exit
// No capture effects on the array itself (no Set), just collect values
return self.compile_array_scope(
&Expr::QuantifiedExpr(quant.clone()),
exit,
nav_override,
vec![], // No capture effects (no @name to set)
capture,
false, // Not a string capture
);
}
let config = QuantifierConfig {
inner: &inner,
kind,
first_nav: nav_override,
in_array_context: false,
element_capture: capture,
needs_split_exits: false,
match_exit: exit,
skip_exit: None,
};
self.compile_quantified_unified(config)
}
/// Compile a quantified expression for array capture with element-level effects.
///
/// The element_capture effects (typically [Push]) are placed on each iteration.
pub(super) fn compile_quantified_for_array(
&mut self,
quant: &ast::QuantifiedExpr,
exit: Label,
nav_override: Option<Nav>,
element_capture: CaptureEffects,
) -> Label {
let (inner, kind) = match parse_quantifier(quant) {
QuantifierParse::Empty => return exit,
QuantifierParse::Plain(inner) => {
return self.compile_expr_inner(&inner, exit, nav_override, element_capture);
}
QuantifierParse::Quantified { inner, kind } => (inner, kind),
};
let config = QuantifierConfig {
inner: &inner,
kind,
first_nav: nav_override,
in_array_context: true,
element_capture,
needs_split_exits: false,
match_exit: exit,
skip_exit: None,
};
self.compile_quantified_unified(config)
}
/// Compile a skippable expression (optional/star) with separate exits for skip/match paths.
pub(super) fn compile_skippable_with_exits(
&mut self,
expr: &Expr,
match_exit: Label,
skip_exit: Label,
nav_override: Option<Nav>,
capture: CaptureEffects,
) -> Label {
// Handle CapturedExpr wrapping
if let Expr::CapturedExpr(cap) = expr
&& let Some(inner) = cap.inner()
{
// Array capture: need special handling with Arr/EndArr
if is_star_or_plus_quantifier(Some(&inner)) {
return self.compile_array_capture_with_exits(
cap,
&inner,
match_exit,
skip_exit,
nav_override,
capture,
);
}
// Non-array capture: build capture effects and recurse
let capture_effects = self.build_capture_effects(cap, Some(&inner));
let combined = capture.clone().with_post_values(capture_effects);
return self.compile_skippable_with_exits(
&inner,
match_exit,
skip_exit,
nav_override,
combined,
);
}
// Must be a QuantifiedExpr at this point
let Expr::QuantifiedExpr(quant) = expr else {
return self.compile_expr_inner(expr, match_exit, nav_override, capture);
};
let (inner, kind) = match parse_quantifier(quant) {
QuantifierParse::Empty => return match_exit,
QuantifierParse::Plain(inner) => {
return self.compile_expr_inner(&inner, match_exit, nav_override, capture);
}
QuantifierParse::Quantified { inner, kind } => (inner, kind),
};
// When the inner returns a structured type (enum/struct) and this is a star/plus
// quantifier without explicit capture, we still need array scope (Arr/Push/EndArr)
// with split exits for the skip/match paths.
let needs_implicit_array = matches!(kind, QuantifierKind::Star | QuantifierKind::Plus)
&& self.is_ref_returning_structured(&inner);
if needs_implicit_array {
return self.compile_implicit_array_with_exits(
quant,
match_exit,
skip_exit,
nav_override,
capture,
);
}
// Handle null injection for both passed captures and internal captures
let skip_with_null = self.emit_null_for_skip_path(skip_exit, &capture);
let skip_with_internal_null = self.emit_null_for_internal_captures(skip_with_null, &inner);
let config = QuantifierConfig {
inner: &inner,
kind,
first_nav: nav_override,
in_array_context: false,
element_capture: capture,
needs_split_exits: true,
match_exit,
skip_exit: Some(skip_with_internal_null),
};
self.compile_quantified_unified(config)
}
/// Compile an array capture (star/plus with @capture) as first-child with separate exits.
///
/// For array captures, we need:
/// - Arr step at entry
/// - Two EndArr steps: one for skip (→ skip_exit), one for match (→ match_exit)
/// - Star compiled to route skip to skip_EndArr, loop exit to match_EndArr
fn compile_array_capture_with_exits(
&mut self,
cap: &ast::CapturedExpr,
inner: &Expr,
match_exit: Label,
skip_exit: Label,
nav_override: Option<Nav>,
outer_capture: CaptureEffects,
) -> Label {
let capture_effects = self.build_capture_effects(cap, Some(inner));
// Create two EndArr steps with different continuations
let match_endarr = self.emit_endarr_step(&capture_effects, &outer_capture.post, match_exit);
let skip_endarr = self.emit_endarr_step(&capture_effects, &outer_capture.post, skip_exit);
let push_effects =
CaptureEffects::new_post(if self.quantifier_needs_node_for_push(inner) {
let node_eff = if cap.has_string_annotation() {
EffectIR::text()
} else {
EffectIR::node()
};
vec![node_eff, EffectIR::push()]
} else {
vec![EffectIR::push()]
});
let inner_entry = self.compile_star_for_array_with_exits(
inner,
match_endarr,
skip_endarr,
nav_override,
push_effects,
);
// Emit Arr step at entry (with outer pre-effects like Enum)
self.emit_arr_step(inner_entry, outer_capture.pre)
}
/// Compile an implicit array (star/plus without @capture) returning structured type,
/// as first-child with separate exits.
///
/// Like `compile_array_capture_with_exits` but without explicit capture effects.
/// Used when `(RefName)*` where RefName returns enum/struct.
fn compile_implicit_array_with_exits(
&mut self,
quant: &ast::QuantifiedExpr,
match_exit: Label,
skip_exit: Label,
nav_override: Option<Nav>,
outer_capture: CaptureEffects,
) -> Label {
// No capture effects since there's no @name
let capture_effects = vec![];
// Create two EndArr steps with different continuations
let match_endarr = self.emit_endarr_step(&capture_effects, &outer_capture.post, match_exit);
let skip_endarr = self.emit_endarr_step(&capture_effects, &outer_capture.post, skip_exit);
// Inner returns structured type, so no Node effect needed - just Push
let push_effects = CaptureEffects::new_post(vec![EffectIR::push()]);
let inner_entry = self.compile_star_for_array_with_exits(
&Expr::QuantifiedExpr(quant.clone()),
match_endarr,
skip_endarr,
nav_override,
push_effects,
);
// Emit Arr step at entry (with outer pre-effects like Enum)
self.emit_arr_step(inner_entry, outer_capture.pre)
}
/// Compile a star quantifier for array context with separate exits.
fn compile_star_for_array_with_exits(
&mut self,
expr: &Expr,
match_exit: Label,
skip_exit: Label,
nav_override: Option<Nav>,
capture: CaptureEffects,
) -> Label {
let Expr::QuantifiedExpr(quant) = expr else {
return self.compile_expr_inner(expr, match_exit, nav_override, capture);
};
let (inner, kind) = match parse_quantifier(quant) {
QuantifierParse::Empty => return match_exit,
QuantifierParse::Plain(inner) => {
return self.compile_expr_inner(&inner, match_exit, nav_override, capture);
}
QuantifierParse::Quantified { inner, kind } => (inner, kind),
};
let config = QuantifierConfig {
inner: &inner,
kind,
first_nav: nav_override,
in_array_context: true,
element_capture: capture,
needs_split_exits: true,
match_exit,
skip_exit: Some(skip_exit),
};
self.compile_quantified_unified(config)
}
/// Unified quantifier compilation.
///
/// This is the single entry point for all quantifier compilation, handling:
/// - Star (`*`), Plus (`+`), and Optional (`?`) quantifiers
/// - Greedy and non-greedy variants
/// - Array context (with struct wrappers if needed)
/// - Split exits for first-child skippable patterns
fn compile_quantified_unified(&mut self, config: QuantifierConfig<'_>) -> Label {
let QuantifierConfig {
inner,
kind,
first_nav,
in_array_context,
element_capture,
needs_split_exits,
match_exit,
skip_exit,
} = config;
// Determine if struct wrapper is needed (once, here)
let needs_struct_wrapper =
in_array_context && check_needs_struct_wrapper(inner, self.ctx.type_ctx);
let row_type_id = if in_array_context {
get_row_type_id(inner, self.ctx.type_ctx)
} else {
None
};
// Compile body helper - handles struct wrapper logic in one place
let compile_body = |this: &mut Self, nav: Option<Nav>, exit: Label| -> Label {
if needs_struct_wrapper {
this.compile_struct_for_array(inner, exit, nav, row_type_id)
} else if in_array_context {
this.compile_with_optional_scope(row_type_id, |t| {
t.compile_expr_inner(inner, exit, nav, element_capture.clone())
})
} else {
this.compile_expr_inner(inner, exit, nav, element_capture.clone())
}
};
let is_greedy = kind.is_greedy();
match kind {
QuantifierKind::Plus | QuantifierKind::PlusNonGreedy => {
// Plus with skip-retry: must match at least once
// First iteration has no exit fallback (backtrack propagates to caller)
let loop_entry = self.fresh_label();
// Compile body ONCE with Nav::StayExact (exact match at current position,
// skip-retry handles advancement if all branches fail)
let body_entry = compile_body(self, Some(Nav::StayExact), loop_entry);
// First iteration: skip-retry but NO exit (must match at least one)
let first_nav_mode = first_nav.unwrap_or(Nav::Down);
let first_iterate = self.compile_skip_retry_iteration_no_exit(
first_nav_mode,
body_entry,
is_greedy,
);
// Repeat iteration: skip-retry with exit option
let repeat_iterate =
self.compile_skip_retry_iteration(Nav::Next, body_entry, match_exit, is_greedy);
// loop_entry → [repeat_iterate, exit]
self.emit_branch_epsilon_at(loop_entry, repeat_iterate, match_exit, is_greedy);
first_iterate
}
QuantifierKind::Star | QuantifierKind::StarNonGreedy => {
if needs_split_exits {
// Star with split exits: uses skip-retry with separate exit paths
let skip = skip_exit.expect("split exits requires skip_exit");
self.compile_star_with_skip_retry_split_exits(
inner,
match_exit,
skip,
first_nav,
element_capture,
is_greedy,
needs_struct_wrapper,
row_type_id,
)
} else {
// Regular star with skip-retry:
// When pattern fails (even on descendant), retry with next sibling
let loop_entry = self.fresh_label();
// Compile body ONCE with Nav::StayExact (exact match at current position,
// skip-retry handles advancement if all branches fail)
let body_entry = compile_body(self, Some(Nav::StayExact), loop_entry);
// First iteration: Down navigation with skip-retry
let first_nav_mode = first_nav.unwrap_or(Nav::Down);
let first_iterate = self.compile_skip_retry_iteration(
first_nav_mode,
body_entry,
match_exit,
is_greedy,
);
// Repeat iteration: Next navigation with skip-retry
let repeat_iterate = self.compile_skip_retry_iteration(
Nav::Next,
body_entry,
match_exit,
is_greedy,
);
// loop_entry → [repeat_iterate, exit]
self.emit_branch_epsilon_at(loop_entry, repeat_iterate, match_exit, is_greedy);
// entry → [first_iterate, exit]
self.emit_branch_epsilon(first_iterate, match_exit, is_greedy)
}
}
QuantifierKind::Optional | QuantifierKind::OptionalNonGreedy => {
// Optional with skip-retry: matches 0 or 1 time
// Compile body with Nav::StayExact (exact match at current position)
let body_entry = compile_body(self, Some(Nav::StayExact), match_exit);
// Build exit-with-null path for when no match found
let skip_with_null = if needs_split_exits {
skip_exit.expect("split exits requires skip_exit")
} else {
let null_exit = self.emit_null_for_skip_path(match_exit, &element_capture);
self.emit_null_for_internal_captures(null_exit, inner)
};
// Skip-retry iteration:
// For Optional, both skip paths (entry-level and retry-exhaust) need null.
// - Entry-level (Down fails): skip_with_null (bypass Up, has null from caller)
// - Retry exhaust (Down succeeded but pattern fails): need Up, then null
// When needs_split_exits, create a retry exhaust path with null → match_exit.
let first_nav_mode = first_nav.unwrap_or(Nav::Down);
let iterate_exit = if needs_split_exits {
// Retry exhaust needs null injection then goes to match_exit (for Up)
let null_exit = self.emit_null_for_skip_path(match_exit, &element_capture);
self.emit_null_for_internal_captures(null_exit, inner)
} else {
skip_with_null
};
let iterate = self.compile_skip_retry_iteration(
first_nav_mode,
body_entry,
iterate_exit,
is_greedy,
);
// entry → [iterate, skip_with_null]
self.emit_branch_epsilon(iterate, skip_with_null, is_greedy)
}
}
}
/// Compile a "try-skip-retry" iteration for quantifiers.
///
/// Structure:
/// ```text
/// navigate: Match(nav, wildcard) → try
/// try: epsilon → [body, retry_or_exit]
/// retry_or_exit: epsilon → [retry_nav, exit]
/// retry_nav: Match(Nav::Next, wildcard) → try
/// ```
///
/// When the body fails (even deep inside on a descendant), we backtrack to `try`,
/// which falls through to `retry_or_exit`, advancing to the next sibling and retrying.
/// Only when siblings are exhausted do we take the exit path.
///
/// Returns the navigate label (entry point for this iteration).
fn compile_skip_retry_iteration(
&mut self,
nav: Nav,
body_entry: Label,
exit: Label,
is_greedy: bool,
) -> Label {
let try_label = self.fresh_label();
let retry_or_exit = self.fresh_label();
// retry_nav: advance and loop back to try
let retry_nav = self.fresh_label();
self.emit_wildcard_nav(retry_nav, Nav::Next, try_label);
// retry_or_exit: epsilon → [retry_nav, exit]
self.emit_branch_epsilon_at(retry_or_exit, retry_nav, exit, is_greedy);
// try: epsilon → [body, retry_or_exit]
self.emit_branch_epsilon_at(try_label, body_entry, retry_or_exit, is_greedy);
// navigate: wildcard nav → try
let navigate = self.fresh_label();
self.emit_wildcard_nav(navigate, nav, try_label);
navigate
}
/// Like `compile_skip_retry_iteration` but with no exit fallback.
///
/// Used for Plus quantifier's first iteration where at least one match is required.
/// If all siblings fail, backtrack propagates to caller (quantifier fails).
fn compile_skip_retry_iteration_no_exit(
&mut self,
nav: Nav,
body_entry: Label,
is_greedy: bool,
) -> Label {
let try_label = self.fresh_label();
// retry_nav: advance and loop back to try (no exit option)
let retry_nav = self.fresh_label();
self.emit_wildcard_nav(retry_nav, Nav::Next, try_label);
// try: epsilon → [body, retry_nav]
// If pattern fails, we advance and retry. If no more siblings,
// retry_nav's navigation fails and we backtrack to outer checkpoint.
self.emit_branch_epsilon_at(try_label, body_entry, retry_nav, is_greedy);
// navigate: wildcard nav → try
let navigate = self.fresh_label();
self.emit_wildcard_nav(navigate, nav, try_label);
navigate
}
/// Helper for star with split exits and skip-retry.
/// Skip path goes to skip_exit, match path goes to match_exit.
#[allow(clippy::too_many_arguments)]
fn compile_star_with_skip_retry_split_exits(
&mut self,
inner: &Expr,
match_exit: Label,
skip_exit: Label,
nav_override: Option<Nav>,
capture: CaptureEffects,
is_greedy: bool,
needs_struct_wrapper: bool,
row_type_id: Option<TypeId>,
) -> Label {
let loop_entry = self.fresh_label();
// Compile body ONCE with Nav::StayExact (exact match at current position,
// skip-retry handles advancement if all branches fail)
let body_entry = if needs_struct_wrapper {
self.compile_struct_for_array(inner, loop_entry, Some(Nav::StayExact), row_type_id)
} else {
self.compile_expr_inner(inner, loop_entry, Some(Nav::StayExact), capture)
};
// First iteration: skip-retry with match_exit as internal fallback.
// When retry exhausts (Down succeeded but pattern fails), we need Up via match_exit.
// The entry-level epsilon (below) handles Down failure → skip_exit (bypass Up).
let first_nav_mode = nav_override.unwrap_or(Nav::Down);
let first_iterate =
self.compile_skip_retry_iteration(first_nav_mode, body_entry, match_exit, is_greedy);
// Repeat iteration: skip-retry with match_exit as fallback
let repeat_iterate =
self.compile_skip_retry_iteration(Nav::Next, body_entry, match_exit, is_greedy);
// loop_entry → [repeat_iterate, match_exit]
self.emit_branch_epsilon_at(loop_entry, repeat_iterate, match_exit, is_greedy);
// entry → [first_iterate, skip_exit]
self.emit_branch_epsilon(first_iterate, skip_exit, is_greedy)
}
}