dcbor_pattern/pattern/structure/array_pattern/mod.rs
1use std::ops::RangeBounds;
2
3use dcbor::prelude::*;
4
5use crate::{
6 Interval,
7 pattern::{
8 Matcher, MetaPattern, Path, Pattern,
9 meta::{RepeatPattern, SequencePattern},
10 vm::Instr,
11 },
12};
13
14mod assigner;
15mod backtrack;
16mod helpers;
17
18use assigner::SequenceAssigner;
19use helpers::*;
20
21/// Pattern for matching CBOR array structures.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum ArrayPattern {
24 /// Matches any array.
25 Any,
26 /// Matches arrays with elements that match the given pattern.
27 Elements(Box<Pattern>),
28 /// Matches arrays with length in the given interval.
29 Length(Interval),
30}
31
32impl ArrayPattern {
33 /// Creates a new `ArrayPattern` that matches any array.
34 pub fn any() -> Self { ArrayPattern::Any }
35
36 /// Creates a new `ArrayPattern` that matches arrays with elements
37 /// that match the given pattern.
38 pub fn with_elements(pattern: Pattern) -> Self {
39 ArrayPattern::Elements(Box::new(pattern))
40 }
41
42 pub fn with_length_range<R: RangeBounds<usize>>(range: R) -> Self {
43 ArrayPattern::Length(Interval::new(range))
44 }
45
46 /// Creates a new `ArrayPattern` that matches arrays with length in the
47 /// given range.
48 pub fn with_length_interval(interval: Interval) -> Self {
49 ArrayPattern::Length(interval)
50 }
51
52 /// Match a complex sequence against array elements using VM-based matching.
53 /// This handles patterns with repeats and other complex constructs that
54 /// require backtracking and proper quantifier evaluation.
55 fn match_complex_sequence(
56 &self,
57 cbor: &CBOR,
58 pattern: &Pattern,
59 ) -> Vec<Path> {
60 // For complex sequences containing repeats, we need to check if the
61 // pattern can match the array elements in sequence.
62 // The key insight is that a sequence pattern like
63 // `(*)*, 42, (*)*` should match if there's any way to
64 // arrange the array elements to satisfy the sequence
65 // requirements.
66
67 match cbor.as_case() {
68 CBORCase::Array(arr) => {
69 // Create a synthetic "element sequence" CBOR value to match
70 // against This represents the array elements as
71 // a sequence that the pattern can evaluate
72
73 // For sequences with repeats, we need to check if the pattern
74 // can be satisfied by the array elements in order
75 let can_match =
76 self.can_match_sequence_against_array(pattern, arr);
77
78 if can_match {
79 vec![vec![cbor.clone()]]
80 } else {
81 vec![]
82 }
83 }
84 _ => {
85 vec![] // Not an array
86 }
87 }
88 }
89
90 /// Check if a sequence pattern can match against array elements.
91 /// This implements the core logic for matching patterns like
92 /// `(*)*, 42, (*)*` against array elements.
93 fn can_match_sequence_against_array(
94 &self,
95 pattern: &Pattern,
96 arr: &[CBOR],
97 ) -> bool {
98 match pattern {
99 Pattern::Meta(MetaPattern::Sequence(seq_pattern)) => {
100 self.match_sequence_patterns_against_array(seq_pattern, arr)
101 }
102 Pattern::Meta(MetaPattern::Repeat(repeat_pattern)) => {
103 // Single repeat pattern: check if it can consume all array
104 // elements
105 self.match_repeat_pattern_against_array(repeat_pattern, arr)
106 }
107 _ => {
108 // For non-sequence patterns, fall back to simple matching
109 let array_cbor = arr.to_cbor();
110 pattern.matches(&array_cbor)
111 }
112 }
113 }
114
115 /// Match a sequence of patterns against array elements.
116 /// This is the core algorithm for handling sequences with repeats.
117 fn match_sequence_patterns_against_array(
118 &self,
119 seq_pattern: &SequencePattern,
120 arr: &[CBOR],
121 ) -> bool {
122 let patterns = seq_pattern.patterns();
123 let assigner = SequenceAssigner::new(patterns, arr);
124 assigner.can_match()
125 }
126
127 /// Match a single repeat pattern against array elements.
128 fn match_repeat_pattern_against_array(
129 &self,
130 repeat_pattern: &RepeatPattern,
131 arr: &[CBOR],
132 ) -> bool {
133 let quantifier = repeat_pattern.quantifier();
134 let min_count = quantifier.min();
135 let max_count = quantifier.max().unwrap_or(arr.len());
136
137 // Check if the array length is within the valid range for this repeat
138 if arr.len() < min_count || arr.len() > max_count {
139 return false;
140 }
141
142 // Check if all elements match the repeated pattern
143 arr.iter()
144 .all(|element| repeat_pattern.pattern().matches(element))
145 }
146
147 /// Handle sequence patterns with captures by manually matching elements
148 /// and collecting captures with proper array context.
149 fn handle_sequence_captures(
150 &self,
151 seq_pattern: &SequencePattern,
152 array_cbor: &CBOR,
153 arr: &[CBOR],
154 ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
155 // Use the existing sequence matching logic to find element assignments
156 if let Some(assignments) =
157 self.find_sequence_element_assignments(seq_pattern, arr)
158 {
159 let mut all_captures = std::collections::HashMap::new();
160
161 // Process each pattern and its assigned elements
162 for (pattern_idx, pattern) in
163 seq_pattern.patterns().iter().enumerate()
164 {
165 // Check if this is a capture pattern containing a repeat
166 // pattern
167 if let Pattern::Meta(crate::pattern::MetaPattern::Capture(
168 capture_pattern,
169 )) = pattern
170 {
171 // Check if the capture contains a repeat pattern
172 if extract_capture_with_repeat(pattern).is_some() {
173 // This is a capture pattern with a repeat (like
174 // @rest((*)*)) We need to
175 // capture the sub-array of matched elements
176 let captured_elements: Vec<CBOR> = assignments
177 .iter()
178 .filter_map(|&(p_idx, e_idx)| {
179 if p_idx == pattern_idx {
180 Some(arr[e_idx].clone())
181 } else {
182 None
183 }
184 })
185 .collect();
186
187 // Create a sub-array from the captured elements
188 let sub_array = captured_elements.to_cbor();
189
190 // For capture patterns, we directly capture the
191 // sub-array with the capture name
192 let capture_name = capture_pattern.name().to_string();
193 let array_context_path =
194 build_simple_array_context_path(
195 array_cbor, &sub_array,
196 );
197
198 all_captures
199 .entry(capture_name.clone())
200 .or_insert_with(Vec::new)
201 .push(array_context_path);
202
203 continue;
204 }
205 }
206 // Check if this is a direct repeat pattern that might capture
207 // multiple elements
208 else if is_repeat_pattern(pattern)
209 && let Pattern::Meta(crate::pattern::MetaPattern::Repeat(
210 repeat_pattern,
211 )) = pattern
212 {
213 // For repeat patterns, check if they have captures
214 let mut repeat_capture_names = Vec::new();
215 repeat_pattern
216 .collect_capture_names(&mut repeat_capture_names);
217
218 if !repeat_capture_names.is_empty() {
219 // This is a repeat pattern with captures (like
220 // @rest((*)*))
221 // We need to capture the sub-array of matched
222 // elements
223 let captured_elements: Vec<CBOR> = assignments
224 .iter()
225 .filter_map(|&(p_idx, e_idx)| {
226 if p_idx == pattern_idx {
227 Some(arr[e_idx].clone())
228 } else {
229 None
230 }
231 })
232 .collect();
233
234 // Create a sub-array from the captured elements
235 let sub_array = captured_elements.to_cbor();
236
237 // Get captures from the repeat pattern against the
238 // sub-array
239 let (_sub_paths, sub_captures) =
240 repeat_pattern.paths_with_captures(&sub_array);
241
242 // Transform captures to include array context
243 transform_captures_with_array_context(
244 array_cbor,
245 &sub_array,
246 sub_captures,
247 &mut all_captures,
248 );
249 continue;
250 }
251 }
252
253 // For non-repeat patterns or repeat patterns without captures,
254 // process each assigned element individually
255 for element_idx in
256 assignments.iter().filter_map(|&(p_idx, e_idx)| {
257 if p_idx == pattern_idx {
258 Some(e_idx)
259 } else {
260 None
261 }
262 })
263 {
264 let element = &arr[element_idx];
265
266 // Get captures from this pattern matching this element
267 let (_element_paths, element_captures) =
268 pattern.paths_with_captures(element);
269
270 // Transform captures to include array context
271 transform_captures_with_array_context(
272 array_cbor,
273 element,
274 element_captures,
275 &mut all_captures,
276 );
277 }
278 }
279
280 // Return the array path and all captures
281 (vec![vec![array_cbor.clone()]], all_captures)
282 } else {
283 // Sequence doesn't match the array
284 (vec![], std::collections::HashMap::new())
285 }
286 }
287
288 /// Find which array elements are assigned to which sequence patterns.
289 /// Returns a vector of (pattern_index, element_index) pairs if the sequence
290 /// matches.
291 fn find_sequence_element_assignments(
292 &self,
293 seq_pattern: &SequencePattern,
294 arr: &[CBOR],
295 ) -> Option<Vec<(usize, usize)>> {
296 let patterns = seq_pattern.patterns();
297 let assigner = SequenceAssigner::new(patterns, arr);
298 assigner.find_assignments()
299 }
300}
301
302impl Matcher for ArrayPattern {
303 fn paths(&self, haystack: &CBOR) -> Vec<Path> {
304 // First check if this is an array
305 match haystack.as_case() {
306 CBORCase::Array(arr) => {
307 match self {
308 ArrayPattern::Any => {
309 // Match any array - return the array itself
310 vec![vec![haystack.clone()]]
311 }
312 ArrayPattern::Elements(pattern) => {
313 // For unified syntax, the pattern should match against
314 // the array elements
315 // as a sequence, not against any individual element.
316 //
317 // Examples:
318 // - [42] should match [42] but not [1, 42, 3]
319 // - ["a" , "b"] should match ["a", "b"] but not ["a",
320 // "x", "b"]
321
322 // Check if this is a simple single-element case
323 use crate::pattern::{MetaPattern, Pattern};
324
325 match pattern.as_ref() {
326 // Simple case: single pattern should match array
327 // with exactly one element
328 Pattern::Value(_)
329 | Pattern::Structure(_)
330 | Pattern::Meta(MetaPattern::Any(_)) => {
331 if arr.len() == 1 {
332 if pattern.matches(&arr[0]) {
333 vec![vec![haystack.clone()]]
334 } else {
335 vec![]
336 }
337 } else {
338 vec![]
339 }
340 }
341
342 // Complex case: sequences, repeats, etc.
343 Pattern::Meta(MetaPattern::Sequence(
344 seq_pattern,
345 )) => {
346 let patterns = seq_pattern.patterns();
347
348 // Check if this sequence contains any repeat
349 // patterns that require VM-based matching
350 let has_repeat_patterns =
351 has_repeat_patterns_in_slice(patterns);
352
353 if has_repeat_patterns {
354 // Use VM-based matching for complex
355 // sequences
356
357 self.match_complex_sequence(
358 haystack, pattern,
359 )
360 } else {
361 // Simple sequence: match each pattern
362 // against consecutive elements
363 if patterns.len() == arr.len() {
364 // Check if each pattern matches the
365 // corresponding array element
366 for (i, element_pattern) in
367 patterns.iter().enumerate()
368 {
369 if !element_pattern.matches(&arr[i])
370 {
371 return vec![];
372 }
373 }
374 vec![vec![haystack.clone()]]
375 } else {
376 vec![]
377 }
378 }
379 }
380
381 // For individual repeat patterns
382 Pattern::Meta(MetaPattern::Repeat(_)) => {
383 // Use VM-based matching for repeat patterns
384 self.match_complex_sequence(haystack, pattern)
385 }
386
387 // For other meta patterns, handle them properly
388 Pattern::Meta(MetaPattern::Capture(
389 capture_pattern,
390 )) => {
391 // Capture patterns should search within array
392 // elements
393 // (This is different from non-capture patterns)
394 let has_matching_element =
395 arr.iter().any(|element| {
396 capture_pattern
397 .pattern()
398 .matches(element)
399 });
400
401 if has_matching_element {
402 vec![vec![haystack.clone()]]
403 } else {
404 vec![]
405 }
406 }
407
408 // For other meta patterns (or, and, etc.), use the
409 // old heuristic
410 // This handles cases like `[(number | text)]`
411 _ => {
412 // Check if the pattern matches the array as a
413 // whole sequence
414 // For now, use a heuristic: if it's a simple
415 // meta pattern,
416 // apply it to each element and require at least
417 // one match
418 // This is not perfect but maintains some
419 // compatibility
420 let mut result = Vec::new();
421 for element in arr {
422 if pattern.matches(element) {
423 result.push(vec![haystack.clone()]);
424 break;
425 }
426 }
427 result
428 }
429 }
430 }
431 ArrayPattern::Length(interval) => {
432 if interval.contains(arr.len()) {
433 vec![vec![haystack.clone()]]
434 } else {
435 vec![]
436 }
437 }
438 }
439 }
440 _ => {
441 // Not an array, no match
442 vec![]
443 }
444 }
445 }
446
447 fn compile(
448 &self,
449 code: &mut Vec<Instr>,
450 literals: &mut Vec<Pattern>,
451 captures: &mut Vec<String>,
452 ) {
453 // Collect capture names from inner patterns
454 self.collect_capture_names(captures);
455
456 // Check if this pattern has captures
457 let mut capture_names = Vec::new();
458 self.collect_capture_names(&mut capture_names);
459
460 if capture_names.is_empty() {
461 // No captures, use the simple MatchStructure approach
462 let idx = literals.len();
463 literals.push(Pattern::Structure(
464 crate::pattern::StructurePattern::Array(self.clone()),
465 ));
466 code.push(Instr::MatchStructure(idx));
467 } else {
468 // Has captures, compile to VM navigation instructions
469 match self {
470 ArrayPattern::Elements(pattern) => {
471 // First check that we have an array
472 let array_check_idx = literals.len();
473 literals.push(Pattern::Structure(
474 crate::pattern::StructurePattern::Array(
475 ArrayPattern::Any,
476 ),
477 ));
478 code.push(Instr::MatchStructure(array_check_idx));
479
480 // Navigate to array elements
481 code.push(Instr::PushAxis(
482 crate::pattern::vm::Axis::ArrayElement,
483 ));
484
485 // Compile the inner pattern with captures
486 pattern.compile(code, literals, captures);
487
488 // Pop back to array level
489 code.push(Instr::Pop);
490 }
491 _ => {
492 // Other array patterns (length-based) don't support
493 // captures in this context Fall back to
494 // MatchStructure
495 let idx = literals.len();
496 literals.push(Pattern::Structure(
497 crate::pattern::StructurePattern::Array(self.clone()),
498 ));
499 code.push(Instr::MatchStructure(idx));
500 }
501 }
502 }
503 }
504
505 fn collect_capture_names(&self, names: &mut Vec<String>) {
506 match self {
507 ArrayPattern::Any => {
508 // No captures in a simple any pattern
509 }
510 ArrayPattern::Elements(pattern) => {
511 // Collect captures from the element pattern
512 pattern.collect_capture_names(names);
513 }
514 ArrayPattern::Length(_) => {
515 // No captures in length range patterns
516 }
517 }
518 }
519
520 fn paths_with_captures(
521 &self,
522 cbor: &CBOR,
523 ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
524 // For simple cases that never have captures, use the fast path
525 match self {
526 ArrayPattern::Any | ArrayPattern::Length(_) => {
527 return (self.paths(cbor), std::collections::HashMap::new());
528 }
529 ArrayPattern::Elements(pattern) => {
530 // Check if this specific pattern has any captures
531 let mut capture_names = Vec::new();
532 pattern.collect_capture_names(&mut capture_names);
533
534 if capture_names.is_empty() {
535 // No captures in the element pattern, use the fast path
536 return (
537 self.paths(cbor),
538 std::collections::HashMap::new(),
539 );
540 }
541
542 // Has captures, continue with complex logic below
543 }
544 }
545
546 match cbor.as_case() {
547 CBORCase::Array(_arr) => {
548 if let ArrayPattern::Elements(pattern) = self {
549 // First check if this array pattern matches at all
550 if self.paths(cbor).is_empty() {
551 return (vec![], std::collections::HashMap::new());
552 }
553
554 // For patterns with captures, we need special handling
555 // depending on the inner pattern type
556 match pattern.as_ref() {
557 Pattern::Meta(
558 crate::pattern::MetaPattern::Sequence(seq_pattern),
559 ) => {
560 // Special handling for SequencePattern with
561 // captures
562 self.handle_sequence_captures(
563 seq_pattern,
564 cbor,
565 _arr,
566 )
567 }
568 Pattern::Meta(
569 crate::pattern::MetaPattern::Capture(
570 _capture_pattern,
571 ),
572 ) => {
573 // For capture patterns like [@item(number)] or
574 // [@item(42)],
575 // use the VM approach for consistency with existing
576 // behavior
577
578 // Use the VM approach for consistent behavior
579 let mut code = Vec::new();
580 let mut literals = Vec::new();
581 let mut captures_list = Vec::new();
582
583 // Compile the entire ArrayPattern (not just the
584 // inner pattern)
585 let array_pattern = Pattern::Structure(
586 crate::pattern::StructurePattern::Array(
587 self.clone(),
588 ),
589 );
590 array_pattern.compile(
591 &mut code,
592 &mut literals,
593 &mut captures_list,
594 );
595 code.push(crate::pattern::vm::Instr::Accept);
596
597 let program = crate::pattern::vm::Program {
598 code,
599 literals,
600 capture_names: captures_list,
601 };
602
603 // Run the VM program against the CBOR
604 crate::pattern::vm::run(&program, cbor)
605 }
606 _ => {
607 // For non-sequence patterns, use the original VM
608 // approach
609 // but start with the main Pattern's VM compilation
610 // for better compatibility
611 let mut code = Vec::new();
612 let mut literals = Vec::new();
613 let mut captures = Vec::new();
614
615 // Compile the entire ArrayPattern (not just the
616 // inner pattern)
617 let array_pattern = Pattern::Structure(
618 crate::pattern::StructurePattern::Array(
619 self.clone(),
620 ),
621 );
622 array_pattern.compile(
623 &mut code,
624 &mut literals,
625 &mut captures,
626 );
627 code.push(crate::pattern::vm::Instr::Accept);
628
629 let program = crate::pattern::vm::Program {
630 code,
631 literals,
632 capture_names: captures,
633 };
634
635 // Run the VM program against the CBOR
636 crate::pattern::vm::run(&program, cbor)
637 }
638 }
639 } else {
640 // Other array patterns (length-based) don't have inner
641 // patterns with captures
642 (self.paths(cbor), std::collections::HashMap::new())
643 }
644 }
645 _ => {
646 // Not an array, no match
647 (vec![], std::collections::HashMap::new())
648 }
649 }
650 }
651}
652
653impl std::fmt::Display for ArrayPattern {
654 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
655 match self {
656 ArrayPattern::Any => write!(f, "array"),
657 ArrayPattern::Elements(pattern) => {
658 let formatted_pattern = format_array_element_pattern(pattern);
659 write!(f, "[{}]", formatted_pattern)
660 }
661 ArrayPattern::Length(interval) => {
662 write!(f, "[{}]", interval)
663 }
664 }
665 }
666}