dcbor_pattern/pattern/structure/array_pattern/
mod.rs

1use std::ops::RangeBounds;
2
3use dcbor::prelude::*;
4
5use crate::{
6    Interval,
7    pattern::{
8        Matcher, MetaPattern, Path, Pattern,
9        meta::{RepeatPattern, SequencePattern},
10        vm::Instr,
11    },
12};
13
14mod assigner;
15mod backtrack;
16mod helpers;
17
18use assigner::SequenceAssigner;
19use helpers::*;
20
21/// Pattern for matching CBOR array structures.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum ArrayPattern {
24    /// Matches any array.
25    Any,
26    /// Matches arrays with elements that match the given pattern.
27    Elements(Box<Pattern>),
28    /// Matches arrays with length in the given interval.
29    Length(Interval),
30}
31
32impl ArrayPattern {
33    /// Creates a new `ArrayPattern` that matches any array.
34    pub fn any() -> Self { ArrayPattern::Any }
35
36    /// Creates a new `ArrayPattern` that matches arrays with elements
37    /// that match the given pattern.
38    pub fn with_elements(pattern: Pattern) -> Self {
39        ArrayPattern::Elements(Box::new(pattern))
40    }
41
42    pub fn with_length_range<R: RangeBounds<usize>>(range: R) -> Self {
43        ArrayPattern::Length(Interval::new(range))
44    }
45
46    /// Creates a new `ArrayPattern` that matches arrays with length in the
47    /// given range.
48    pub fn with_length_interval(interval: Interval) -> Self {
49        ArrayPattern::Length(interval)
50    }
51
52    /// Match a complex sequence against array elements using VM-based matching.
53    /// This handles patterns with repeats and other complex constructs that
54    /// require backtracking and proper quantifier evaluation.
55    fn match_complex_sequence(
56        &self,
57        cbor: &CBOR,
58        pattern: &Pattern,
59    ) -> Vec<Path> {
60        // For complex sequences containing repeats, we need to check if the
61        // pattern can match the array elements in sequence.
62        // The key insight is that a sequence pattern like
63        // `(*)*, 42, (*)*`  should match if there's any way to
64        // arrange the array elements to satisfy the sequence
65        // requirements.
66
67        match cbor.as_case() {
68            CBORCase::Array(arr) => {
69                // Create a synthetic "element sequence" CBOR value to match
70                // against This represents the array elements as
71                // a sequence that the pattern can evaluate
72
73                // For sequences with repeats, we need to check if the pattern
74                // can be satisfied by the array elements in order
75                let can_match =
76                    self.can_match_sequence_against_array(pattern, arr);
77                let result = if can_match {
78                    vec![vec![cbor.clone()]]
79                } else {
80                    vec![]
81                };
82                result
83            }
84            _ => {
85                vec![] // Not an array
86            }
87        }
88    }
89
90    /// Check if a sequence pattern can match against array elements.
91    /// This implements the core logic for matching patterns like
92    /// `(*)*, 42, (*)*` against array elements.
93    fn can_match_sequence_against_array(
94        &self,
95        pattern: &Pattern,
96        arr: &[CBOR],
97    ) -> bool {
98        match pattern {
99            Pattern::Meta(MetaPattern::Sequence(seq_pattern)) => {
100                self.match_sequence_patterns_against_array(seq_pattern, arr)
101            }
102            Pattern::Meta(MetaPattern::Repeat(repeat_pattern)) => {
103                // Single repeat pattern: check if it can consume all array
104                // elements
105                self.match_repeat_pattern_against_array(repeat_pattern, arr)
106            }
107            _ => {
108                // For non-sequence patterns, fall back to simple matching
109                let array_cbor = arr.to_cbor();
110                pattern.matches(&array_cbor)
111            }
112        }
113    }
114
115    /// Match a sequence of patterns against array elements.
116    /// This is the core algorithm for handling sequences with repeats.
117    fn match_sequence_patterns_against_array(
118        &self,
119        seq_pattern: &SequencePattern,
120        arr: &[CBOR],
121    ) -> bool {
122        let patterns = seq_pattern.patterns();
123        let assigner = SequenceAssigner::new(patterns, arr);
124        assigner.can_match()
125    }
126
127    /// Match a single repeat pattern against array elements.
128    fn match_repeat_pattern_against_array(
129        &self,
130        repeat_pattern: &RepeatPattern,
131        arr: &[CBOR],
132    ) -> bool {
133        let quantifier = repeat_pattern.quantifier();
134        let min_count = quantifier.min();
135        let max_count = quantifier.max().unwrap_or(arr.len());
136
137        // Check if the array length is within the valid range for this repeat
138        if arr.len() < min_count || arr.len() > max_count {
139            return false;
140        }
141
142        // Check if all elements match the repeated pattern
143        arr.iter()
144            .all(|element| repeat_pattern.pattern().matches(element))
145    }
146
147    /// Handle sequence patterns with captures by manually matching elements
148    /// and collecting captures with proper array context.
149    fn handle_sequence_captures(
150        &self,
151        seq_pattern: &SequencePattern,
152        array_cbor: &CBOR,
153        arr: &[CBOR],
154    ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
155        // Use the existing sequence matching logic to find element assignments
156        if let Some(assignments) =
157            self.find_sequence_element_assignments(seq_pattern, arr)
158        {
159            let mut all_captures = std::collections::HashMap::new();
160
161            // Process each pattern and its assigned elements
162            for (pattern_idx, pattern) in
163                seq_pattern.patterns().iter().enumerate()
164            {
165                // Check if this is a capture pattern containing a repeat
166                // pattern
167                if let Pattern::Meta(crate::pattern::MetaPattern::Capture(
168                    capture_pattern,
169                )) = pattern
170                {
171                    // Check if the capture contains a repeat pattern
172                    if extract_capture_with_repeat(pattern).is_some() {
173                        // This is a capture pattern with a repeat (like
174                        // @rest((*)*)) We need to
175                        // capture the sub-array of matched elements
176                        let captured_elements: Vec<CBOR> = assignments
177                            .iter()
178                            .filter_map(|&(p_idx, e_idx)| {
179                                if p_idx == pattern_idx {
180                                    Some(arr[e_idx].clone())
181                                } else {
182                                    None
183                                }
184                            })
185                            .collect();
186
187                        // Create a sub-array from the captured elements
188                        let sub_array = captured_elements.to_cbor();
189
190                        // For capture patterns, we directly capture the
191                        // sub-array with the capture name
192                        let capture_name = capture_pattern.name().to_string();
193                        let array_context_path =
194                            build_simple_array_context_path(
195                                array_cbor, &sub_array,
196                            );
197
198                        all_captures
199                            .entry(capture_name.clone())
200                            .or_insert_with(Vec::new)
201                            .push(array_context_path);
202
203                        continue;
204                    }
205                }
206                // Check if this is a direct repeat pattern that might capture
207                // multiple elements
208                else if is_repeat_pattern(pattern) {
209                    if let Pattern::Meta(crate::pattern::MetaPattern::Repeat(
210                        repeat_pattern,
211                    )) = pattern
212                    {
213                        // For repeat patterns, check if they have captures
214                        let mut repeat_capture_names = Vec::new();
215                        repeat_pattern
216                            .collect_capture_names(&mut repeat_capture_names);
217
218                        if !repeat_capture_names.is_empty() {
219                            // This is a repeat pattern with captures (like
220                            // @rest((*)*))
221                            // We need to capture the sub-array of matched
222                            // elements
223                            let captured_elements: Vec<CBOR> = assignments
224                                .iter()
225                                .filter_map(|&(p_idx, e_idx)| {
226                                    if p_idx == pattern_idx {
227                                        Some(arr[e_idx].clone())
228                                    } else {
229                                        None
230                                    }
231                                })
232                                .collect();
233
234                            // Create a sub-array from the captured elements
235                            let sub_array = captured_elements.to_cbor();
236
237                            // Get captures from the repeat pattern against the
238                            // sub-array
239                            let (_sub_paths, sub_captures) =
240                                repeat_pattern.paths_with_captures(&sub_array);
241
242                            // Transform captures to include array context
243                            transform_captures_with_array_context(
244                                array_cbor,
245                                &sub_array,
246                                sub_captures,
247                                &mut all_captures,
248                            );
249                            continue;
250                        }
251                    }
252                }
253
254                // For non-repeat patterns or repeat patterns without captures,
255                // process each assigned element individually
256                for element_idx in
257                    assignments.iter().filter_map(|&(p_idx, e_idx)| {
258                        if p_idx == pattern_idx {
259                            Some(e_idx)
260                        } else {
261                            None
262                        }
263                    })
264                {
265                    let element = &arr[element_idx];
266
267                    // Get captures from this pattern matching this element
268                    let (_element_paths, element_captures) =
269                        pattern.paths_with_captures(element);
270
271                    // Transform captures to include array context
272                    transform_captures_with_array_context(
273                        array_cbor,
274                        element,
275                        element_captures,
276                        &mut all_captures,
277                    );
278                }
279            }
280
281            // Return the array path and all captures
282            (vec![vec![array_cbor.clone()]], all_captures)
283        } else {
284            // Sequence doesn't match the array
285            (vec![], std::collections::HashMap::new())
286        }
287    }
288
289    /// Find which array elements are assigned to which sequence patterns.
290    /// Returns a vector of (pattern_index, element_index) pairs if the sequence
291    /// matches.
292    fn find_sequence_element_assignments(
293        &self,
294        seq_pattern: &SequencePattern,
295        arr: &[CBOR],
296    ) -> Option<Vec<(usize, usize)>> {
297        let patterns = seq_pattern.patterns();
298        let assigner = SequenceAssigner::new(patterns, arr);
299        assigner.find_assignments()
300    }
301}
302
303impl Matcher for ArrayPattern {
304    fn paths(&self, haystack: &CBOR) -> Vec<Path> {
305        // First check if this is an array
306        match haystack.as_case() {
307            CBORCase::Array(arr) => {
308                match self {
309                    ArrayPattern::Any => {
310                        // Match any array - return the array itself
311                        vec![vec![haystack.clone()]]
312                    }
313                    ArrayPattern::Elements(pattern) => {
314                        // For unified syntax, the pattern should match against
315                        // the array elements
316                        // as a sequence, not against any individual element.
317                        //
318                        // Examples:
319                        // - [42] should match [42] but not [1, 42, 3]
320                        // - ["a" , "b"] should match ["a", "b"] but not ["a",
321                        //   "x", "b"]
322
323                        // Check if this is a simple single-element case
324                        use crate::pattern::{MetaPattern, Pattern};
325
326                        match pattern.as_ref() {
327                            // Simple case: single pattern should match array
328                            // with exactly one element
329                            Pattern::Value(_)
330                            | Pattern::Structure(_)
331                            | Pattern::Meta(MetaPattern::Any(_)) => {
332                                if arr.len() == 1 {
333                                    if pattern.matches(&arr[0]) {
334                                        vec![vec![haystack.clone()]]
335                                    } else {
336                                        vec![]
337                                    }
338                                } else {
339                                    vec![]
340                                }
341                            }
342
343                            // Complex case: sequences, repeats, etc.
344                            Pattern::Meta(MetaPattern::Sequence(
345                                seq_pattern,
346                            )) => {
347                                let patterns = seq_pattern.patterns();
348
349                                // Check if this sequence contains any repeat
350                                // patterns that require VM-based matching
351                                let has_repeat_patterns =
352                                    has_repeat_patterns_in_slice(patterns);
353
354                                if has_repeat_patterns {
355                                    // Use VM-based matching for complex
356                                    // sequences
357                                    let result = self.match_complex_sequence(
358                                        haystack, pattern,
359                                    );
360                                    result
361                                } else {
362                                    // Simple sequence: match each pattern
363                                    // against consecutive elements
364                                    if patterns.len() == arr.len() {
365                                        // Check if each pattern matches the
366                                        // corresponding array element
367                                        for (i, element_pattern) in
368                                            patterns.iter().enumerate()
369                                        {
370                                            if !element_pattern.matches(&arr[i])
371                                            {
372                                                return vec![];
373                                            }
374                                        }
375                                        vec![vec![haystack.clone()]]
376                                    } else {
377                                        vec![]
378                                    }
379                                }
380                            }
381
382                            // For individual repeat patterns
383                            Pattern::Meta(MetaPattern::Repeat(_)) => {
384                                // Use VM-based matching for repeat patterns
385                                self.match_complex_sequence(haystack, pattern)
386                            }
387
388                            // For other meta patterns, handle them properly
389                            Pattern::Meta(MetaPattern::Capture(
390                                capture_pattern,
391                            )) => {
392                                // Capture patterns should search within array
393                                // elements
394                                // (This is different from non-capture patterns)
395                                let has_matching_element =
396                                    arr.iter().any(|element| {
397                                        capture_pattern
398                                            .pattern()
399                                            .matches(element)
400                                    });
401
402                                if has_matching_element {
403                                    vec![vec![haystack.clone()]]
404                                } else {
405                                    vec![]
406                                }
407                            }
408
409                            // For other meta patterns (or, and, etc.), use the
410                            // old heuristic
411                            // This handles cases like `[(number | text)]`
412                            _ => {
413                                // Check if the pattern matches the array as a
414                                // whole sequence
415                                // For now, use a heuristic: if it's a simple
416                                // meta pattern,
417                                // apply it to each element and require at least
418                                // one match
419                                // This is not perfect but maintains some
420                                // compatibility
421                                let mut result = Vec::new();
422                                for element in arr {
423                                    if pattern.matches(element) {
424                                        result.push(vec![haystack.clone()]);
425                                        break;
426                                    }
427                                }
428                                result
429                            }
430                        }
431                    }
432                    ArrayPattern::Length(interval) => {
433                        if interval.contains(arr.len()) {
434                            vec![vec![haystack.clone()]]
435                        } else {
436                            vec![]
437                        }
438                    }
439                }
440            }
441            _ => {
442                // Not an array, no match
443                vec![]
444            }
445        }
446    }
447
448    fn compile(
449        &self,
450        code: &mut Vec<Instr>,
451        literals: &mut Vec<Pattern>,
452        captures: &mut Vec<String>,
453    ) {
454        // Collect capture names from inner patterns
455        self.collect_capture_names(captures);
456
457        // Check if this pattern has captures
458        let mut capture_names = Vec::new();
459        self.collect_capture_names(&mut capture_names);
460
461        if capture_names.is_empty() {
462            // No captures, use the simple MatchStructure approach
463            let idx = literals.len();
464            literals.push(Pattern::Structure(
465                crate::pattern::StructurePattern::Array(self.clone()),
466            ));
467            code.push(Instr::MatchStructure(idx));
468        } else {
469            // Has captures, compile to VM navigation instructions
470            match self {
471                ArrayPattern::Elements(pattern) => {
472                    // First check that we have an array
473                    let array_check_idx = literals.len();
474                    literals.push(Pattern::Structure(
475                        crate::pattern::StructurePattern::Array(
476                            ArrayPattern::Any,
477                        ),
478                    ));
479                    code.push(Instr::MatchStructure(array_check_idx));
480
481                    // Navigate to array elements
482                    code.push(Instr::PushAxis(
483                        crate::pattern::vm::Axis::ArrayElement,
484                    ));
485
486                    // Compile the inner pattern with captures
487                    pattern.compile(code, literals, captures);
488
489                    // Pop back to array level
490                    code.push(Instr::Pop);
491                }
492                _ => {
493                    // Other array patterns (length-based) don't support
494                    // captures in this context Fall back to
495                    // MatchStructure
496                    let idx = literals.len();
497                    literals.push(Pattern::Structure(
498                        crate::pattern::StructurePattern::Array(self.clone()),
499                    ));
500                    code.push(Instr::MatchStructure(idx));
501                }
502            }
503        }
504    }
505
506    fn collect_capture_names(&self, names: &mut Vec<String>) {
507        match self {
508            ArrayPattern::Any => {
509                // No captures in a simple any pattern
510            }
511            ArrayPattern::Elements(pattern) => {
512                // Collect captures from the element pattern
513                pattern.collect_capture_names(names);
514            }
515            ArrayPattern::Length(_) => {
516                // No captures in length range patterns
517            }
518        }
519    }
520
521    fn paths_with_captures(
522        &self,
523        cbor: &CBOR,
524    ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
525        // For simple cases that never have captures, use the fast path
526        match self {
527            ArrayPattern::Any | ArrayPattern::Length(_) => {
528                return (self.paths(cbor), std::collections::HashMap::new());
529            }
530            ArrayPattern::Elements(pattern) => {
531                // Check if this specific pattern has any captures
532                let mut capture_names = Vec::new();
533                pattern.collect_capture_names(&mut capture_names);
534
535                if capture_names.is_empty() {
536                    // No captures in the element pattern, use the fast path
537                    return (
538                        self.paths(cbor),
539                        std::collections::HashMap::new(),
540                    );
541                }
542
543                // Has captures, continue with complex logic below
544            }
545        }
546
547        match cbor.as_case() {
548            CBORCase::Array(_arr) => {
549                if let ArrayPattern::Elements(pattern) = self {
550                    // First check if this array pattern matches at all
551                    if self.paths(cbor).is_empty() {
552                        return (vec![], std::collections::HashMap::new());
553                    }
554
555                    // For patterns with captures, we need special handling
556                    // depending on the inner pattern type
557                    match pattern.as_ref() {
558                        Pattern::Meta(
559                            crate::pattern::MetaPattern::Sequence(seq_pattern),
560                        ) => {
561                            // Special handling for SequencePattern with
562                            // captures
563                            self.handle_sequence_captures(
564                                seq_pattern,
565                                cbor,
566                                _arr,
567                            )
568                        }
569                        Pattern::Meta(
570                            crate::pattern::MetaPattern::Capture(
571                                _capture_pattern,
572                            ),
573                        ) => {
574                            // For capture patterns like [@item(number)] or
575                            // [@item(42)],
576                            // use the VM approach for consistency with existing
577                            // behavior
578
579                            // Use the VM approach for consistent behavior
580                            let mut code = Vec::new();
581                            let mut literals = Vec::new();
582                            let mut captures_list = Vec::new();
583
584                            // Compile the entire ArrayPattern (not just the
585                            // inner pattern)
586                            let array_pattern = Pattern::Structure(
587                                crate::pattern::StructurePattern::Array(
588                                    self.clone(),
589                                ),
590                            );
591                            array_pattern.compile(
592                                &mut code,
593                                &mut literals,
594                                &mut captures_list,
595                            );
596                            code.push(crate::pattern::vm::Instr::Accept);
597
598                            let program = crate::pattern::vm::Program {
599                                code,
600                                literals,
601                                capture_names: captures_list,
602                            };
603
604                            // Run the VM program against the CBOR
605                            crate::pattern::vm::run(&program, cbor)
606                        }
607                        _ => {
608                            // For non-sequence patterns, use the original VM
609                            // approach
610                            // but start with the main Pattern's VM compilation
611                            // for better compatibility
612                            let mut code = Vec::new();
613                            let mut literals = Vec::new();
614                            let mut captures = Vec::new();
615
616                            // Compile the entire ArrayPattern (not just the
617                            // inner pattern)
618                            let array_pattern = Pattern::Structure(
619                                crate::pattern::StructurePattern::Array(
620                                    self.clone(),
621                                ),
622                            );
623                            array_pattern.compile(
624                                &mut code,
625                                &mut literals,
626                                &mut captures,
627                            );
628                            code.push(crate::pattern::vm::Instr::Accept);
629
630                            let program = crate::pattern::vm::Program {
631                                code,
632                                literals,
633                                capture_names: captures,
634                            };
635
636                            // Run the VM program against the CBOR
637                            crate::pattern::vm::run(&program, cbor)
638                        }
639                    }
640                } else {
641                    // Other array patterns (length-based) don't have inner
642                    // patterns with captures
643                    (self.paths(cbor), std::collections::HashMap::new())
644                }
645            }
646            _ => {
647                // Not an array, no match
648                (vec![], std::collections::HashMap::new())
649            }
650        }
651    }
652}
653
654impl std::fmt::Display for ArrayPattern {
655    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
656        match self {
657            ArrayPattern::Any => write!(f, "array"),
658            ArrayPattern::Elements(pattern) => {
659                let formatted_pattern = format_array_element_pattern(pattern);
660                write!(f, "[{}]", formatted_pattern)
661            }
662            ArrayPattern::Length(interval) => {
663                write!(f, "[{}]", interval)
664            }
665        }
666    }
667}