dcbor_pattern/pattern/structure/array_pattern/
mod.rs

1use std::ops::RangeBounds;
2
3use dcbor::prelude::*;
4
5use crate::{
6    Interval,
7    pattern::{
8        Matcher, MetaPattern, Path, Pattern,
9        meta::{RepeatPattern, SequencePattern},
10        vm::Instr,
11    },
12};
13
14mod assigner;
15mod backtrack;
16mod helpers;
17
18use assigner::SequenceAssigner;
19use helpers::*;
20
21/// Pattern for matching CBOR array structures.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum ArrayPattern {
24    /// Matches any array.
25    Any,
26    /// Matches arrays with elements that match the given pattern.
27    Elements(Box<Pattern>),
28    /// Matches arrays with length in the given interval.
29    Length(Interval),
30}
31
32impl ArrayPattern {
33    /// Creates a new `ArrayPattern` that matches any array.
34    pub fn any() -> Self { ArrayPattern::Any }
35
36    /// Creates a new `ArrayPattern` that matches arrays with elements
37    /// that match the given pattern.
38    pub fn with_elements(pattern: Pattern) -> Self {
39        ArrayPattern::Elements(Box::new(pattern))
40    }
41
42    pub fn with_length_range<R: RangeBounds<usize>>(range: R) -> Self {
43        ArrayPattern::Length(Interval::new(range))
44    }
45
46    /// Creates a new `ArrayPattern` that matches arrays with length in the
47    /// given range.
48    pub fn with_length_interval(interval: Interval) -> Self {
49        ArrayPattern::Length(interval)
50    }
51
52    /// Match a complex sequence against array elements using VM-based matching.
53    /// This handles patterns with repeats and other complex constructs that
54    /// require backtracking and proper quantifier evaluation.
55    fn match_complex_sequence(
56        &self,
57        cbor: &CBOR,
58        pattern: &Pattern,
59    ) -> Vec<Path> {
60        // For complex sequences containing repeats, we need to check if the
61        // pattern can match the array elements in sequence.
62        // The key insight is that a sequence pattern like
63        // `(*)*, 42, (*)*`  should match if there's any way to
64        // arrange the array elements to satisfy the sequence
65        // requirements.
66
67        match cbor.as_case() {
68            CBORCase::Array(arr) => {
69                // Create a synthetic "element sequence" CBOR value to match
70                // against This represents the array elements as
71                // a sequence that the pattern can evaluate
72
73                // For sequences with repeats, we need to check if the pattern
74                // can be satisfied by the array elements in order
75                let can_match =
76                    self.can_match_sequence_against_array(pattern, arr);
77
78                if can_match {
79                    vec![vec![cbor.clone()]]
80                } else {
81                    vec![]
82                }
83            }
84            _ => {
85                vec![] // Not an array
86            }
87        }
88    }
89
90    /// Check if a sequence pattern can match against array elements.
91    /// This implements the core logic for matching patterns like
92    /// `(*)*, 42, (*)*` against array elements.
93    fn can_match_sequence_against_array(
94        &self,
95        pattern: &Pattern,
96        arr: &[CBOR],
97    ) -> bool {
98        match pattern {
99            Pattern::Meta(MetaPattern::Sequence(seq_pattern)) => {
100                self.match_sequence_patterns_against_array(seq_pattern, arr)
101            }
102            Pattern::Meta(MetaPattern::Repeat(repeat_pattern)) => {
103                // Single repeat pattern: check if it can consume all array
104                // elements
105                self.match_repeat_pattern_against_array(repeat_pattern, arr)
106            }
107            _ => {
108                // For non-sequence patterns, fall back to simple matching
109                let array_cbor = arr.to_cbor();
110                pattern.matches(&array_cbor)
111            }
112        }
113    }
114
115    /// Match a sequence of patterns against array elements.
116    /// This is the core algorithm for handling sequences with repeats.
117    fn match_sequence_patterns_against_array(
118        &self,
119        seq_pattern: &SequencePattern,
120        arr: &[CBOR],
121    ) -> bool {
122        let patterns = seq_pattern.patterns();
123        let assigner = SequenceAssigner::new(patterns, arr);
124        assigner.can_match()
125    }
126
127    /// Match a single repeat pattern against array elements.
128    fn match_repeat_pattern_against_array(
129        &self,
130        repeat_pattern: &RepeatPattern,
131        arr: &[CBOR],
132    ) -> bool {
133        let quantifier = repeat_pattern.quantifier();
134        let min_count = quantifier.min();
135        let max_count = quantifier.max().unwrap_or(arr.len());
136
137        // Check if the array length is within the valid range for this repeat
138        if arr.len() < min_count || arr.len() > max_count {
139            return false;
140        }
141
142        // Check if all elements match the repeated pattern
143        arr.iter()
144            .all(|element| repeat_pattern.pattern().matches(element))
145    }
146
147    /// Handle sequence patterns with captures by manually matching elements
148    /// and collecting captures with proper array context.
149    fn handle_sequence_captures(
150        &self,
151        seq_pattern: &SequencePattern,
152        array_cbor: &CBOR,
153        arr: &[CBOR],
154    ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
155        // Use the existing sequence matching logic to find element assignments
156        if let Some(assignments) =
157            self.find_sequence_element_assignments(seq_pattern, arr)
158        {
159            let mut all_captures = std::collections::HashMap::new();
160
161            // Process each pattern and its assigned elements
162            for (pattern_idx, pattern) in
163                seq_pattern.patterns().iter().enumerate()
164            {
165                // Check if this is a capture pattern containing a repeat
166                // pattern
167                if let Pattern::Meta(crate::pattern::MetaPattern::Capture(
168                    capture_pattern,
169                )) = pattern
170                {
171                    // Check if the capture contains a repeat pattern
172                    if extract_capture_with_repeat(pattern).is_some() {
173                        // This is a capture pattern with a repeat (like
174                        // @rest((*)*)) We need to
175                        // capture the sub-array of matched elements
176                        let captured_elements: Vec<CBOR> = assignments
177                            .iter()
178                            .filter_map(|&(p_idx, e_idx)| {
179                                if p_idx == pattern_idx {
180                                    Some(arr[e_idx].clone())
181                                } else {
182                                    None
183                                }
184                            })
185                            .collect();
186
187                        // Create a sub-array from the captured elements
188                        let sub_array = captured_elements.to_cbor();
189
190                        // For capture patterns, we directly capture the
191                        // sub-array with the capture name
192                        let capture_name = capture_pattern.name().to_string();
193                        let array_context_path =
194                            build_simple_array_context_path(
195                                array_cbor, &sub_array,
196                            );
197
198                        all_captures
199                            .entry(capture_name.clone())
200                            .or_insert_with(Vec::new)
201                            .push(array_context_path);
202
203                        continue;
204                    }
205                }
206                // Check if this is a direct repeat pattern that might capture
207                // multiple elements
208                else if is_repeat_pattern(pattern)
209                    && let Pattern::Meta(crate::pattern::MetaPattern::Repeat(
210                        repeat_pattern,
211                    )) = pattern
212                {
213                    // For repeat patterns, check if they have captures
214                    let mut repeat_capture_names = Vec::new();
215                    repeat_pattern
216                        .collect_capture_names(&mut repeat_capture_names);
217
218                    if !repeat_capture_names.is_empty() {
219                        // This is a repeat pattern with captures (like
220                        // @rest((*)*))
221                        // We need to capture the sub-array of matched
222                        // elements
223                        let captured_elements: Vec<CBOR> = assignments
224                            .iter()
225                            .filter_map(|&(p_idx, e_idx)| {
226                                if p_idx == pattern_idx {
227                                    Some(arr[e_idx].clone())
228                                } else {
229                                    None
230                                }
231                            })
232                            .collect();
233
234                        // Create a sub-array from the captured elements
235                        let sub_array = captured_elements.to_cbor();
236
237                        // Get captures from the repeat pattern against the
238                        // sub-array
239                        let (_sub_paths, sub_captures) =
240                            repeat_pattern.paths_with_captures(&sub_array);
241
242                        // Transform captures to include array context
243                        transform_captures_with_array_context(
244                            array_cbor,
245                            &sub_array,
246                            sub_captures,
247                            &mut all_captures,
248                        );
249                        continue;
250                    }
251                }
252
253                // For non-repeat patterns or repeat patterns without captures,
254                // process each assigned element individually
255                for element_idx in
256                    assignments.iter().filter_map(|&(p_idx, e_idx)| {
257                        if p_idx == pattern_idx {
258                            Some(e_idx)
259                        } else {
260                            None
261                        }
262                    })
263                {
264                    let element = &arr[element_idx];
265
266                    // Get captures from this pattern matching this element
267                    let (_element_paths, element_captures) =
268                        pattern.paths_with_captures(element);
269
270                    // Transform captures to include array context
271                    transform_captures_with_array_context(
272                        array_cbor,
273                        element,
274                        element_captures,
275                        &mut all_captures,
276                    );
277                }
278            }
279
280            // Return the array path and all captures
281            (vec![vec![array_cbor.clone()]], all_captures)
282        } else {
283            // Sequence doesn't match the array
284            (vec![], std::collections::HashMap::new())
285        }
286    }
287
288    /// Find which array elements are assigned to which sequence patterns.
289    /// Returns a vector of (pattern_index, element_index) pairs if the sequence
290    /// matches.
291    fn find_sequence_element_assignments(
292        &self,
293        seq_pattern: &SequencePattern,
294        arr: &[CBOR],
295    ) -> Option<Vec<(usize, usize)>> {
296        let patterns = seq_pattern.patterns();
297        let assigner = SequenceAssigner::new(patterns, arr);
298        assigner.find_assignments()
299    }
300}
301
302impl Matcher for ArrayPattern {
303    fn paths(&self, haystack: &CBOR) -> Vec<Path> {
304        // First check if this is an array
305        match haystack.as_case() {
306            CBORCase::Array(arr) => {
307                match self {
308                    ArrayPattern::Any => {
309                        // Match any array - return the array itself
310                        vec![vec![haystack.clone()]]
311                    }
312                    ArrayPattern::Elements(pattern) => {
313                        // For unified syntax, the pattern should match against
314                        // the array elements
315                        // as a sequence, not against any individual element.
316                        //
317                        // Examples:
318                        // - [42] should match [42] but not [1, 42, 3]
319                        // - ["a" , "b"] should match ["a", "b"] but not ["a",
320                        //   "x", "b"]
321
322                        // Check if this is a simple single-element case
323                        use crate::pattern::{MetaPattern, Pattern};
324
325                        match pattern.as_ref() {
326                            // Simple case: single pattern should match array
327                            // with exactly one element
328                            Pattern::Value(_)
329                            | Pattern::Structure(_)
330                            | Pattern::Meta(MetaPattern::Any(_)) => {
331                                if arr.len() == 1 {
332                                    if pattern.matches(&arr[0]) {
333                                        vec![vec![haystack.clone()]]
334                                    } else {
335                                        vec![]
336                                    }
337                                } else {
338                                    vec![]
339                                }
340                            }
341
342                            // Complex case: sequences, repeats, etc.
343                            Pattern::Meta(MetaPattern::Sequence(
344                                seq_pattern,
345                            )) => {
346                                let patterns = seq_pattern.patterns();
347
348                                // Check if this sequence contains any repeat
349                                // patterns that require VM-based matching
350                                let has_repeat_patterns =
351                                    has_repeat_patterns_in_slice(patterns);
352
353                                if has_repeat_patterns {
354                                    // Use VM-based matching for complex
355                                    // sequences
356
357                                    self.match_complex_sequence(
358                                        haystack, pattern,
359                                    )
360                                } else {
361                                    // Simple sequence: match each pattern
362                                    // against consecutive elements
363                                    if patterns.len() == arr.len() {
364                                        // Check if each pattern matches the
365                                        // corresponding array element
366                                        for (i, element_pattern) in
367                                            patterns.iter().enumerate()
368                                        {
369                                            if !element_pattern.matches(&arr[i])
370                                            {
371                                                return vec![];
372                                            }
373                                        }
374                                        vec![vec![haystack.clone()]]
375                                    } else {
376                                        vec![]
377                                    }
378                                }
379                            }
380
381                            // For individual repeat patterns
382                            Pattern::Meta(MetaPattern::Repeat(_)) => {
383                                // Use VM-based matching for repeat patterns
384                                self.match_complex_sequence(haystack, pattern)
385                            }
386
387                            // For other meta patterns, handle them properly
388                            Pattern::Meta(MetaPattern::Capture(
389                                capture_pattern,
390                            )) => {
391                                // Capture patterns should search within array
392                                // elements
393                                // (This is different from non-capture patterns)
394                                let has_matching_element =
395                                    arr.iter().any(|element| {
396                                        capture_pattern
397                                            .pattern()
398                                            .matches(element)
399                                    });
400
401                                if has_matching_element {
402                                    vec![vec![haystack.clone()]]
403                                } else {
404                                    vec![]
405                                }
406                            }
407
408                            // For other meta patterns (or, and, etc.), use the
409                            // old heuristic
410                            // This handles cases like `[(number | text)]`
411                            _ => {
412                                // Check if the pattern matches the array as a
413                                // whole sequence
414                                // For now, use a heuristic: if it's a simple
415                                // meta pattern,
416                                // apply it to each element and require at least
417                                // one match
418                                // This is not perfect but maintains some
419                                // compatibility
420                                let mut result = Vec::new();
421                                for element in arr {
422                                    if pattern.matches(element) {
423                                        result.push(vec![haystack.clone()]);
424                                        break;
425                                    }
426                                }
427                                result
428                            }
429                        }
430                    }
431                    ArrayPattern::Length(interval) => {
432                        if interval.contains(arr.len()) {
433                            vec![vec![haystack.clone()]]
434                        } else {
435                            vec![]
436                        }
437                    }
438                }
439            }
440            _ => {
441                // Not an array, no match
442                vec![]
443            }
444        }
445    }
446
447    fn compile(
448        &self,
449        code: &mut Vec<Instr>,
450        literals: &mut Vec<Pattern>,
451        captures: &mut Vec<String>,
452    ) {
453        // Collect capture names from inner patterns
454        self.collect_capture_names(captures);
455
456        // Check if this pattern has captures
457        let mut capture_names = Vec::new();
458        self.collect_capture_names(&mut capture_names);
459
460        if capture_names.is_empty() {
461            // No captures, use the simple MatchStructure approach
462            let idx = literals.len();
463            literals.push(Pattern::Structure(
464                crate::pattern::StructurePattern::Array(self.clone()),
465            ));
466            code.push(Instr::MatchStructure(idx));
467        } else {
468            // Has captures, compile to VM navigation instructions
469            match self {
470                ArrayPattern::Elements(pattern) => {
471                    // First check that we have an array
472                    let array_check_idx = literals.len();
473                    literals.push(Pattern::Structure(
474                        crate::pattern::StructurePattern::Array(
475                            ArrayPattern::Any,
476                        ),
477                    ));
478                    code.push(Instr::MatchStructure(array_check_idx));
479
480                    // Navigate to array elements
481                    code.push(Instr::PushAxis(
482                        crate::pattern::vm::Axis::ArrayElement,
483                    ));
484
485                    // Compile the inner pattern with captures
486                    pattern.compile(code, literals, captures);
487
488                    // Pop back to array level
489                    code.push(Instr::Pop);
490                }
491                _ => {
492                    // Other array patterns (length-based) don't support
493                    // captures in this context Fall back to
494                    // MatchStructure
495                    let idx = literals.len();
496                    literals.push(Pattern::Structure(
497                        crate::pattern::StructurePattern::Array(self.clone()),
498                    ));
499                    code.push(Instr::MatchStructure(idx));
500                }
501            }
502        }
503    }
504
505    fn collect_capture_names(&self, names: &mut Vec<String>) {
506        match self {
507            ArrayPattern::Any => {
508                // No captures in a simple any pattern
509            }
510            ArrayPattern::Elements(pattern) => {
511                // Collect captures from the element pattern
512                pattern.collect_capture_names(names);
513            }
514            ArrayPattern::Length(_) => {
515                // No captures in length range patterns
516            }
517        }
518    }
519
520    fn paths_with_captures(
521        &self,
522        cbor: &CBOR,
523    ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
524        // For simple cases that never have captures, use the fast path
525        match self {
526            ArrayPattern::Any | ArrayPattern::Length(_) => {
527                return (self.paths(cbor), std::collections::HashMap::new());
528            }
529            ArrayPattern::Elements(pattern) => {
530                // Check if this specific pattern has any captures
531                let mut capture_names = Vec::new();
532                pattern.collect_capture_names(&mut capture_names);
533
534                if capture_names.is_empty() {
535                    // No captures in the element pattern, use the fast path
536                    return (
537                        self.paths(cbor),
538                        std::collections::HashMap::new(),
539                    );
540                }
541
542                // Has captures, continue with complex logic below
543            }
544        }
545
546        match cbor.as_case() {
547            CBORCase::Array(_arr) => {
548                if let ArrayPattern::Elements(pattern) = self {
549                    // First check if this array pattern matches at all
550                    if self.paths(cbor).is_empty() {
551                        return (vec![], std::collections::HashMap::new());
552                    }
553
554                    // For patterns with captures, we need special handling
555                    // depending on the inner pattern type
556                    match pattern.as_ref() {
557                        Pattern::Meta(
558                            crate::pattern::MetaPattern::Sequence(seq_pattern),
559                        ) => {
560                            // Special handling for SequencePattern with
561                            // captures
562                            self.handle_sequence_captures(
563                                seq_pattern,
564                                cbor,
565                                _arr,
566                            )
567                        }
568                        Pattern::Meta(
569                            crate::pattern::MetaPattern::Capture(
570                                _capture_pattern,
571                            ),
572                        ) => {
573                            // For capture patterns like [@item(number)] or
574                            // [@item(42)],
575                            // use the VM approach for consistency with existing
576                            // behavior
577
578                            // Use the VM approach for consistent behavior
579                            let mut code = Vec::new();
580                            let mut literals = Vec::new();
581                            let mut captures_list = Vec::new();
582
583                            // Compile the entire ArrayPattern (not just the
584                            // inner pattern)
585                            let array_pattern = Pattern::Structure(
586                                crate::pattern::StructurePattern::Array(
587                                    self.clone(),
588                                ),
589                            );
590                            array_pattern.compile(
591                                &mut code,
592                                &mut literals,
593                                &mut captures_list,
594                            );
595                            code.push(crate::pattern::vm::Instr::Accept);
596
597                            let program = crate::pattern::vm::Program {
598                                code,
599                                literals,
600                                capture_names: captures_list,
601                            };
602
603                            // Run the VM program against the CBOR
604                            crate::pattern::vm::run(&program, cbor)
605                        }
606                        _ => {
607                            // For non-sequence patterns, use the original VM
608                            // approach
609                            // but start with the main Pattern's VM compilation
610                            // for better compatibility
611                            let mut code = Vec::new();
612                            let mut literals = Vec::new();
613                            let mut captures = Vec::new();
614
615                            // Compile the entire ArrayPattern (not just the
616                            // inner pattern)
617                            let array_pattern = Pattern::Structure(
618                                crate::pattern::StructurePattern::Array(
619                                    self.clone(),
620                                ),
621                            );
622                            array_pattern.compile(
623                                &mut code,
624                                &mut literals,
625                                &mut captures,
626                            );
627                            code.push(crate::pattern::vm::Instr::Accept);
628
629                            let program = crate::pattern::vm::Program {
630                                code,
631                                literals,
632                                capture_names: captures,
633                            };
634
635                            // Run the VM program against the CBOR
636                            crate::pattern::vm::run(&program, cbor)
637                        }
638                    }
639                } else {
640                    // Other array patterns (length-based) don't have inner
641                    // patterns with captures
642                    (self.paths(cbor), std::collections::HashMap::new())
643                }
644            }
645            _ => {
646                // Not an array, no match
647                (vec![], std::collections::HashMap::new())
648            }
649        }
650    }
651}
652
653impl std::fmt::Display for ArrayPattern {
654    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
655        match self {
656            ArrayPattern::Any => write!(f, "array"),
657            ArrayPattern::Elements(pattern) => {
658                let formatted_pattern = format_array_element_pattern(pattern);
659                write!(f, "[{}]", formatted_pattern)
660            }
661            ArrayPattern::Length(interval) => {
662                write!(f, "[{}]", interval)
663            }
664        }
665    }
666}