dcbor_pattern/pattern/structure/array_pattern/mod.rs
1use std::ops::RangeBounds;
2
3use dcbor::prelude::*;
4
5use crate::{
6 Interval,
7 pattern::{
8 Matcher, MetaPattern, Path, Pattern,
9 meta::{RepeatPattern, SequencePattern},
10 vm::Instr,
11 },
12};
13
14mod assigner;
15mod backtrack;
16mod helpers;
17
18use assigner::SequenceAssigner;
19use helpers::*;
20
21/// Pattern for matching CBOR array structures.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum ArrayPattern {
24 /// Matches any array.
25 Any,
26 /// Matches arrays with elements that match the given pattern.
27 Elements(Box<Pattern>),
28 /// Matches arrays with length in the given interval.
29 Length(Interval),
30}
31
32impl ArrayPattern {
33 /// Creates a new `ArrayPattern` that matches any array.
34 pub fn any() -> Self { ArrayPattern::Any }
35
36 /// Creates a new `ArrayPattern` that matches arrays with elements
37 /// that match the given pattern.
38 pub fn with_elements(pattern: Pattern) -> Self {
39 ArrayPattern::Elements(Box::new(pattern))
40 }
41
42 pub fn with_length_range<R: RangeBounds<usize>>(range: R) -> Self {
43 ArrayPattern::Length(Interval::new(range))
44 }
45
46 /// Creates a new `ArrayPattern` that matches arrays with length in the
47 /// given range.
48 pub fn with_length_interval(interval: Interval) -> Self {
49 ArrayPattern::Length(interval)
50 }
51
52 /// Match a complex sequence against array elements using VM-based matching.
53 /// This handles patterns with repeats and other complex constructs that
54 /// require backtracking and proper quantifier evaluation.
55 fn match_complex_sequence(
56 &self,
57 cbor: &CBOR,
58 pattern: &Pattern,
59 ) -> Vec<Path> {
60 // For complex sequences containing repeats, we need to check if the
61 // pattern can match the array elements in sequence.
62 // The key insight is that a sequence pattern like
63 // `(*)*, 42, (*)*` should match if there's any way to
64 // arrange the array elements to satisfy the sequence
65 // requirements.
66
67 match cbor.as_case() {
68 CBORCase::Array(arr) => {
69 // Create a synthetic "element sequence" CBOR value to match
70 // against This represents the array elements as
71 // a sequence that the pattern can evaluate
72
73 // For sequences with repeats, we need to check if the pattern
74 // can be satisfied by the array elements in order
75 let can_match =
76 self.can_match_sequence_against_array(pattern, arr);
77 let result = if can_match {
78 vec![vec![cbor.clone()]]
79 } else {
80 vec![]
81 };
82 result
83 }
84 _ => {
85 vec![] // Not an array
86 }
87 }
88 }
89
90 /// Check if a sequence pattern can match against array elements.
91 /// This implements the core logic for matching patterns like
92 /// `(*)*, 42, (*)*` against array elements.
93 fn can_match_sequence_against_array(
94 &self,
95 pattern: &Pattern,
96 arr: &[CBOR],
97 ) -> bool {
98 match pattern {
99 Pattern::Meta(MetaPattern::Sequence(seq_pattern)) => {
100 self.match_sequence_patterns_against_array(seq_pattern, arr)
101 }
102 Pattern::Meta(MetaPattern::Repeat(repeat_pattern)) => {
103 // Single repeat pattern: check if it can consume all array
104 // elements
105 self.match_repeat_pattern_against_array(repeat_pattern, arr)
106 }
107 _ => {
108 // For non-sequence patterns, fall back to simple matching
109 let array_cbor = arr.to_cbor();
110 pattern.matches(&array_cbor)
111 }
112 }
113 }
114
115 /// Match a sequence of patterns against array elements.
116 /// This is the core algorithm for handling sequences with repeats.
117 fn match_sequence_patterns_against_array(
118 &self,
119 seq_pattern: &SequencePattern,
120 arr: &[CBOR],
121 ) -> bool {
122 let patterns = seq_pattern.patterns();
123 let assigner = SequenceAssigner::new(patterns, arr);
124 assigner.can_match()
125 }
126
127 /// Match a single repeat pattern against array elements.
128 fn match_repeat_pattern_against_array(
129 &self,
130 repeat_pattern: &RepeatPattern,
131 arr: &[CBOR],
132 ) -> bool {
133 let quantifier = repeat_pattern.quantifier();
134 let min_count = quantifier.min();
135 let max_count = quantifier.max().unwrap_or(arr.len());
136
137 // Check if the array length is within the valid range for this repeat
138 if arr.len() < min_count || arr.len() > max_count {
139 return false;
140 }
141
142 // Check if all elements match the repeated pattern
143 arr.iter()
144 .all(|element| repeat_pattern.pattern().matches(element))
145 }
146
147 /// Handle sequence patterns with captures by manually matching elements
148 /// and collecting captures with proper array context.
149 fn handle_sequence_captures(
150 &self,
151 seq_pattern: &SequencePattern,
152 array_cbor: &CBOR,
153 arr: &[CBOR],
154 ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
155 // Use the existing sequence matching logic to find element assignments
156 if let Some(assignments) =
157 self.find_sequence_element_assignments(seq_pattern, arr)
158 {
159 let mut all_captures = std::collections::HashMap::new();
160
161 // Process each pattern and its assigned elements
162 for (pattern_idx, pattern) in
163 seq_pattern.patterns().iter().enumerate()
164 {
165 // Check if this is a capture pattern containing a repeat
166 // pattern
167 if let Pattern::Meta(crate::pattern::MetaPattern::Capture(
168 capture_pattern,
169 )) = pattern
170 {
171 // Check if the capture contains a repeat pattern
172 if extract_capture_with_repeat(pattern).is_some() {
173 // This is a capture pattern with a repeat (like
174 // @rest((*)*)) We need to
175 // capture the sub-array of matched elements
176 let captured_elements: Vec<CBOR> = assignments
177 .iter()
178 .filter_map(|&(p_idx, e_idx)| {
179 if p_idx == pattern_idx {
180 Some(arr[e_idx].clone())
181 } else {
182 None
183 }
184 })
185 .collect();
186
187 // Create a sub-array from the captured elements
188 let sub_array = captured_elements.to_cbor();
189
190 // For capture patterns, we directly capture the
191 // sub-array with the capture name
192 let capture_name = capture_pattern.name().to_string();
193 let array_context_path =
194 build_simple_array_context_path(
195 array_cbor, &sub_array,
196 );
197
198 all_captures
199 .entry(capture_name.clone())
200 .or_insert_with(Vec::new)
201 .push(array_context_path);
202
203 continue;
204 }
205 }
206 // Check if this is a direct repeat pattern that might capture
207 // multiple elements
208 else if is_repeat_pattern(pattern) {
209 if let Pattern::Meta(crate::pattern::MetaPattern::Repeat(
210 repeat_pattern,
211 )) = pattern
212 {
213 // For repeat patterns, check if they have captures
214 let mut repeat_capture_names = Vec::new();
215 repeat_pattern
216 .collect_capture_names(&mut repeat_capture_names);
217
218 if !repeat_capture_names.is_empty() {
219 // This is a repeat pattern with captures (like
220 // @rest((*)*))
221 // We need to capture the sub-array of matched
222 // elements
223 let captured_elements: Vec<CBOR> = assignments
224 .iter()
225 .filter_map(|&(p_idx, e_idx)| {
226 if p_idx == pattern_idx {
227 Some(arr[e_idx].clone())
228 } else {
229 None
230 }
231 })
232 .collect();
233
234 // Create a sub-array from the captured elements
235 let sub_array = captured_elements.to_cbor();
236
237 // Get captures from the repeat pattern against the
238 // sub-array
239 let (_sub_paths, sub_captures) =
240 repeat_pattern.paths_with_captures(&sub_array);
241
242 // Transform captures to include array context
243 transform_captures_with_array_context(
244 array_cbor,
245 &sub_array,
246 sub_captures,
247 &mut all_captures,
248 );
249 continue;
250 }
251 }
252 }
253
254 // For non-repeat patterns or repeat patterns without captures,
255 // process each assigned element individually
256 for element_idx in
257 assignments.iter().filter_map(|&(p_idx, e_idx)| {
258 if p_idx == pattern_idx {
259 Some(e_idx)
260 } else {
261 None
262 }
263 })
264 {
265 let element = &arr[element_idx];
266
267 // Get captures from this pattern matching this element
268 let (_element_paths, element_captures) =
269 pattern.paths_with_captures(element);
270
271 // Transform captures to include array context
272 transform_captures_with_array_context(
273 array_cbor,
274 element,
275 element_captures,
276 &mut all_captures,
277 );
278 }
279 }
280
281 // Return the array path and all captures
282 (vec![vec![array_cbor.clone()]], all_captures)
283 } else {
284 // Sequence doesn't match the array
285 (vec![], std::collections::HashMap::new())
286 }
287 }
288
289 /// Find which array elements are assigned to which sequence patterns.
290 /// Returns a vector of (pattern_index, element_index) pairs if the sequence
291 /// matches.
292 fn find_sequence_element_assignments(
293 &self,
294 seq_pattern: &SequencePattern,
295 arr: &[CBOR],
296 ) -> Option<Vec<(usize, usize)>> {
297 let patterns = seq_pattern.patterns();
298 let assigner = SequenceAssigner::new(patterns, arr);
299 assigner.find_assignments()
300 }
301}
302
303impl Matcher for ArrayPattern {
304 fn paths(&self, haystack: &CBOR) -> Vec<Path> {
305 // First check if this is an array
306 match haystack.as_case() {
307 CBORCase::Array(arr) => {
308 match self {
309 ArrayPattern::Any => {
310 // Match any array - return the array itself
311 vec![vec![haystack.clone()]]
312 }
313 ArrayPattern::Elements(pattern) => {
314 // For unified syntax, the pattern should match against
315 // the array elements
316 // as a sequence, not against any individual element.
317 //
318 // Examples:
319 // - [42] should match [42] but not [1, 42, 3]
320 // - ["a" , "b"] should match ["a", "b"] but not ["a",
321 // "x", "b"]
322
323 // Check if this is a simple single-element case
324 use crate::pattern::{MetaPattern, Pattern};
325
326 match pattern.as_ref() {
327 // Simple case: single pattern should match array
328 // with exactly one element
329 Pattern::Value(_)
330 | Pattern::Structure(_)
331 | Pattern::Meta(MetaPattern::Any(_)) => {
332 if arr.len() == 1 {
333 if pattern.matches(&arr[0]) {
334 vec![vec![haystack.clone()]]
335 } else {
336 vec![]
337 }
338 } else {
339 vec![]
340 }
341 }
342
343 // Complex case: sequences, repeats, etc.
344 Pattern::Meta(MetaPattern::Sequence(
345 seq_pattern,
346 )) => {
347 let patterns = seq_pattern.patterns();
348
349 // Check if this sequence contains any repeat
350 // patterns that require VM-based matching
351 let has_repeat_patterns =
352 has_repeat_patterns_in_slice(patterns);
353
354 if has_repeat_patterns {
355 // Use VM-based matching for complex
356 // sequences
357 let result = self.match_complex_sequence(
358 haystack, pattern,
359 );
360 result
361 } else {
362 // Simple sequence: match each pattern
363 // against consecutive elements
364 if patterns.len() == arr.len() {
365 // Check if each pattern matches the
366 // corresponding array element
367 for (i, element_pattern) in
368 patterns.iter().enumerate()
369 {
370 if !element_pattern.matches(&arr[i])
371 {
372 return vec![];
373 }
374 }
375 vec![vec![haystack.clone()]]
376 } else {
377 vec![]
378 }
379 }
380 }
381
382 // For individual repeat patterns
383 Pattern::Meta(MetaPattern::Repeat(_)) => {
384 // Use VM-based matching for repeat patterns
385 self.match_complex_sequence(haystack, pattern)
386 }
387
388 // For other meta patterns, handle them properly
389 Pattern::Meta(MetaPattern::Capture(
390 capture_pattern,
391 )) => {
392 // Capture patterns should search within array
393 // elements
394 // (This is different from non-capture patterns)
395 let has_matching_element =
396 arr.iter().any(|element| {
397 capture_pattern
398 .pattern()
399 .matches(element)
400 });
401
402 if has_matching_element {
403 vec![vec![haystack.clone()]]
404 } else {
405 vec![]
406 }
407 }
408
409 // For other meta patterns (or, and, etc.), use the
410 // old heuristic
411 // This handles cases like `[(number | text)]`
412 _ => {
413 // Check if the pattern matches the array as a
414 // whole sequence
415 // For now, use a heuristic: if it's a simple
416 // meta pattern,
417 // apply it to each element and require at least
418 // one match
419 // This is not perfect but maintains some
420 // compatibility
421 let mut result = Vec::new();
422 for element in arr {
423 if pattern.matches(element) {
424 result.push(vec![haystack.clone()]);
425 break;
426 }
427 }
428 result
429 }
430 }
431 }
432 ArrayPattern::Length(interval) => {
433 if interval.contains(arr.len()) {
434 vec![vec![haystack.clone()]]
435 } else {
436 vec![]
437 }
438 }
439 }
440 }
441 _ => {
442 // Not an array, no match
443 vec![]
444 }
445 }
446 }
447
448 fn compile(
449 &self,
450 code: &mut Vec<Instr>,
451 literals: &mut Vec<Pattern>,
452 captures: &mut Vec<String>,
453 ) {
454 // Collect capture names from inner patterns
455 self.collect_capture_names(captures);
456
457 // Check if this pattern has captures
458 let mut capture_names = Vec::new();
459 self.collect_capture_names(&mut capture_names);
460
461 if capture_names.is_empty() {
462 // No captures, use the simple MatchStructure approach
463 let idx = literals.len();
464 literals.push(Pattern::Structure(
465 crate::pattern::StructurePattern::Array(self.clone()),
466 ));
467 code.push(Instr::MatchStructure(idx));
468 } else {
469 // Has captures, compile to VM navigation instructions
470 match self {
471 ArrayPattern::Elements(pattern) => {
472 // First check that we have an array
473 let array_check_idx = literals.len();
474 literals.push(Pattern::Structure(
475 crate::pattern::StructurePattern::Array(
476 ArrayPattern::Any,
477 ),
478 ));
479 code.push(Instr::MatchStructure(array_check_idx));
480
481 // Navigate to array elements
482 code.push(Instr::PushAxis(
483 crate::pattern::vm::Axis::ArrayElement,
484 ));
485
486 // Compile the inner pattern with captures
487 pattern.compile(code, literals, captures);
488
489 // Pop back to array level
490 code.push(Instr::Pop);
491 }
492 _ => {
493 // Other array patterns (length-based) don't support
494 // captures in this context Fall back to
495 // MatchStructure
496 let idx = literals.len();
497 literals.push(Pattern::Structure(
498 crate::pattern::StructurePattern::Array(self.clone()),
499 ));
500 code.push(Instr::MatchStructure(idx));
501 }
502 }
503 }
504 }
505
506 fn collect_capture_names(&self, names: &mut Vec<String>) {
507 match self {
508 ArrayPattern::Any => {
509 // No captures in a simple any pattern
510 }
511 ArrayPattern::Elements(pattern) => {
512 // Collect captures from the element pattern
513 pattern.collect_capture_names(names);
514 }
515 ArrayPattern::Length(_) => {
516 // No captures in length range patterns
517 }
518 }
519 }
520
521 fn paths_with_captures(
522 &self,
523 cbor: &CBOR,
524 ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
525 // For simple cases that never have captures, use the fast path
526 match self {
527 ArrayPattern::Any | ArrayPattern::Length(_) => {
528 return (self.paths(cbor), std::collections::HashMap::new());
529 }
530 ArrayPattern::Elements(pattern) => {
531 // Check if this specific pattern has any captures
532 let mut capture_names = Vec::new();
533 pattern.collect_capture_names(&mut capture_names);
534
535 if capture_names.is_empty() {
536 // No captures in the element pattern, use the fast path
537 return (
538 self.paths(cbor),
539 std::collections::HashMap::new(),
540 );
541 }
542
543 // Has captures, continue with complex logic below
544 }
545 }
546
547 match cbor.as_case() {
548 CBORCase::Array(_arr) => {
549 if let ArrayPattern::Elements(pattern) = self {
550 // First check if this array pattern matches at all
551 if self.paths(cbor).is_empty() {
552 return (vec![], std::collections::HashMap::new());
553 }
554
555 // For patterns with captures, we need special handling
556 // depending on the inner pattern type
557 match pattern.as_ref() {
558 Pattern::Meta(
559 crate::pattern::MetaPattern::Sequence(seq_pattern),
560 ) => {
561 // Special handling for SequencePattern with
562 // captures
563 self.handle_sequence_captures(
564 seq_pattern,
565 cbor,
566 _arr,
567 )
568 }
569 Pattern::Meta(
570 crate::pattern::MetaPattern::Capture(
571 _capture_pattern,
572 ),
573 ) => {
574 // For capture patterns like [@item(number)] or
575 // [@item(42)],
576 // use the VM approach for consistency with existing
577 // behavior
578
579 // Use the VM approach for consistent behavior
580 let mut code = Vec::new();
581 let mut literals = Vec::new();
582 let mut captures_list = Vec::new();
583
584 // Compile the entire ArrayPattern (not just the
585 // inner pattern)
586 let array_pattern = Pattern::Structure(
587 crate::pattern::StructurePattern::Array(
588 self.clone(),
589 ),
590 );
591 array_pattern.compile(
592 &mut code,
593 &mut literals,
594 &mut captures_list,
595 );
596 code.push(crate::pattern::vm::Instr::Accept);
597
598 let program = crate::pattern::vm::Program {
599 code,
600 literals,
601 capture_names: captures_list,
602 };
603
604 // Run the VM program against the CBOR
605 crate::pattern::vm::run(&program, cbor)
606 }
607 _ => {
608 // For non-sequence patterns, use the original VM
609 // approach
610 // but start with the main Pattern's VM compilation
611 // for better compatibility
612 let mut code = Vec::new();
613 let mut literals = Vec::new();
614 let mut captures = Vec::new();
615
616 // Compile the entire ArrayPattern (not just the
617 // inner pattern)
618 let array_pattern = Pattern::Structure(
619 crate::pattern::StructurePattern::Array(
620 self.clone(),
621 ),
622 );
623 array_pattern.compile(
624 &mut code,
625 &mut literals,
626 &mut captures,
627 );
628 code.push(crate::pattern::vm::Instr::Accept);
629
630 let program = crate::pattern::vm::Program {
631 code,
632 literals,
633 capture_names: captures,
634 };
635
636 // Run the VM program against the CBOR
637 crate::pattern::vm::run(&program, cbor)
638 }
639 }
640 } else {
641 // Other array patterns (length-based) don't have inner
642 // patterns with captures
643 (self.paths(cbor), std::collections::HashMap::new())
644 }
645 }
646 _ => {
647 // Not an array, no match
648 (vec![], std::collections::HashMap::new())
649 }
650 }
651 }
652}
653
654impl std::fmt::Display for ArrayPattern {
655 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
656 match self {
657 ArrayPattern::Any => write!(f, "array"),
658 ArrayPattern::Elements(pattern) => {
659 let formatted_pattern = format_array_element_pattern(pattern);
660 write!(f, "[{}]", formatted_pattern)
661 }
662 ArrayPattern::Length(interval) => {
663 write!(f, "[{}]", interval)
664 }
665 }
666 }
667}