Skip to main content

bc_envelope_pattern/pattern/
mod.rs

1// Pattern module - provides pattern matching functionality for envelopes
2mod matcher;
3mod vm;
4
5// Subdirectory modules
6mod leaf;
7mod meta;
8mod structure;
9
10// Integration modules
11pub mod dcbor_integration;
12
13// Re-export all types
14use std::{
15    cell::RefCell,
16    collections::HashMap,
17    ops::{RangeBounds, RangeInclusive},
18};
19
20use bc_envelope::prelude::*;
21use known_values::KnownValue;
22pub use matcher::{Matcher, Path, compile_as_atomic};
23
24use self::{
25    leaf::{
26        ArrayPattern, BoolPattern, ByteStringPattern, DatePattern,
27        KnownValuePattern, LeafPattern, MapPattern, NullPattern, NumberPattern,
28        TextPattern,
29    },
30    meta::{
31        AndPattern, AnyPattern, CapturePattern, GroupPattern, MetaPattern,
32        NotPattern, OrPattern, SearchPattern, TraversePattern,
33    },
34    structure::{
35        AssertionsPattern, DigestPattern, LeafStructurePattern, NodePattern,
36        ObjectPattern, ObscuredPattern, PredicatePattern, StructurePattern,
37        SubjectPattern, WrappedPattern,
38    },
39};
40use crate::{
41    DCBORPattern, Quantifier, Reluctance,
42    pattern::{leaf::CBORPattern, vm::Instr},
43};
44
45/// The main pattern type used for matching envelopes.
46#[derive(Debug, Clone, Hash, PartialEq, Eq)]
47pub enum Pattern {
48    /// Leaf patterns for matching CBOR values.
49    Leaf(LeafPattern),
50
51    /// Structure patterns for matching envelope elements.
52    Structure(StructurePattern),
53
54    /// Meta-patterns for combining and modifying other patterns.
55    Meta(MetaPattern),
56}
57
58impl Matcher for Pattern {
59    fn paths_with_captures(
60        &self,
61        haystack: &Envelope,
62    ) -> (Vec<Path>, HashMap<String, Vec<Path>>) {
63        let results = self.vm_run(haystack);
64        let mut paths = Vec::new();
65        let mut captures: HashMap<String, Vec<Path>> = HashMap::new();
66        for (p, caps) in results {
67            paths.push(p);
68            for (name, mut vals) in caps {
69                captures.entry(name).or_default().append(&mut vals);
70            }
71        }
72        (paths, captures)
73    }
74
75    fn is_complex(&self) -> bool {
76        match self {
77            Pattern::Leaf(leaf) => leaf.is_complex(),
78            Pattern::Structure(structure) => structure.is_complex(),
79            Pattern::Meta(meta) => meta.is_complex(),
80        }
81    }
82}
83
84// region: Leaf Patterns
85//
86//
87
88impl Pattern {
89    /// Creates a new `Pattern` that matches any CBOR value.
90    pub fn any_cbor() -> Self {
91        Pattern::Leaf(LeafPattern::Cbor(CBORPattern::any()))
92    }
93
94    /// Creates a new `Pattern` that matches a specific CBOR value.
95    pub fn cbor(cbor: impl CBOREncodable) -> Self {
96        Pattern::Leaf(LeafPattern::Cbor(CBORPattern::value(cbor)))
97    }
98
99    /// Creates a new `Pattern` that matches CBOR values using dcbor-pattern
100    /// expressions.
101    pub fn cbor_pattern(pattern: DCBORPattern) -> Self {
102        Pattern::Leaf(LeafPattern::Cbor(CBORPattern::pattern(pattern)))
103    }
104}
105
106impl Pattern {
107    /// Creates a new `Pattern` that matches any boolean value.
108    pub fn any_bool() -> Self {
109        Pattern::Leaf(LeafPattern::Bool(BoolPattern::any()))
110    }
111
112    /// Creates a new `Pattern` that matches a specific boolean value.
113    pub fn bool(b: bool) -> Self {
114        Pattern::Leaf(LeafPattern::Bool(BoolPattern::value(b)))
115    }
116}
117
118impl Pattern {
119    /// Creates a new `Pattern` that matches any text value.
120    pub fn any_text() -> Self {
121        Pattern::Leaf(LeafPattern::Text(TextPattern::any()))
122    }
123
124    /// Creates a new `Pattern` that matches a specific text value.
125    pub fn text<T: Into<String>>(value: T) -> Self {
126        Pattern::Leaf(LeafPattern::Text(TextPattern::value(value)))
127    }
128
129    /// Creates a new `Pattern` that matches text values that match the given
130    /// regular expression.
131    pub fn text_regex(regex: regex::Regex) -> Self {
132        Pattern::Leaf(LeafPattern::Text(TextPattern::regex(regex)))
133    }
134}
135
136impl Pattern {
137    /// Creates a new `Pattern` that matches any Date (CBOR tag 1) value.
138    pub fn any_date() -> Self {
139        Pattern::Leaf(LeafPattern::Date(DatePattern::any()))
140    }
141
142    /// Creates a new `Pattern` that matches a specific Date (CBOR tag 1) value.
143    pub fn date(date: Date) -> Self {
144        Pattern::Leaf(LeafPattern::Date(DatePattern::value(date)))
145    }
146
147    /// Creates a new `Pattern` that matches Date (CBOR tag 1) values within a
148    /// specified range (inclusive).
149    pub fn date_range(range: RangeInclusive<Date>) -> Self {
150        Pattern::Leaf(LeafPattern::Date(DatePattern::range(range)))
151    }
152
153    /// Creates a new `Pattern` that matches Date (CBOR tag 1) values that are
154    /// on or after the specified date.
155    pub fn date_earliest(date: Date) -> Self {
156        Pattern::Leaf(LeafPattern::Date(DatePattern::earliest(date)))
157    }
158
159    /// Creates a new `Pattern` that matches Date (CBOR tag 1) values that are
160    /// on or before the specified date.
161    pub fn date_latest(date: Date) -> Self {
162        Pattern::Leaf(LeafPattern::Date(DatePattern::latest(date)))
163    }
164
165    /// Creates a new `Pattern` that matches Date (CBOR tag 1) values by their
166    /// ISO-8601 string representation.
167    pub fn date_iso8601(iso_string: impl Into<String>) -> Self {
168        Pattern::Leaf(LeafPattern::Date(DatePattern::string(iso_string)))
169    }
170
171    /// Creates a new `Pattern` that matches Date (CBOR tag 1) values whose
172    /// ISO-8601 string representation matches the given regular expression.
173    pub fn date_regex(regex: regex::Regex) -> Self {
174        Pattern::Leaf(LeafPattern::Date(DatePattern::regex(regex)))
175    }
176}
177
178impl Pattern {
179    /// Creates a new `Pattern` that matches any number value.
180    pub fn any_number() -> Self {
181        Pattern::Leaf(LeafPattern::Number(NumberPattern::any()))
182    }
183
184    /// Creates a new `Pattern` that matches a specific number value.
185    pub fn number<T: Into<f64>>(value: T) -> Self {
186        Pattern::Leaf(LeafPattern::Number(NumberPattern::exact(value)))
187    }
188
189    /// Creates a new `Pattern` that matches number values within a specified
190    /// range (inclusive).
191    pub fn number_range<A: Into<f64> + Copy>(range: RangeInclusive<A>) -> Self {
192        Pattern::Leaf(LeafPattern::Number(NumberPattern::range(range)))
193    }
194
195    /// Creates a new `Pattern` that matches number values that are greater than
196    /// the specified value.
197    pub fn number_greater_than<T: Into<f64>>(value: T) -> Self {
198        Pattern::Leaf(LeafPattern::Number(NumberPattern::greater_than(value)))
199    }
200
201    /// Creates a new `Pattern` that matches number values that are greater than
202    /// or equal to the specified value.
203    pub fn number_greater_than_or_equal<T: Into<f64>>(value: T) -> Self {
204        Pattern::Leaf(LeafPattern::Number(
205            NumberPattern::greater_than_or_equal(value),
206        ))
207    }
208
209    /// Creates a new `Pattern` that matches number values that are less than
210    /// the specified value.
211    pub fn number_less_than<T: Into<f64>>(value: T) -> Self {
212        Pattern::Leaf(LeafPattern::Number(NumberPattern::less_than(value)))
213    }
214
215    /// Creates a new `Pattern` that matches number values that are less than or
216    /// equal to the specified value.
217    pub fn number_less_than_or_equal<T: Into<f64>>(value: T) -> Self {
218        Pattern::Leaf(LeafPattern::Number(NumberPattern::less_than_or_equal(
219            value,
220        )))
221    }
222
223    /// Creates a new `Pattern` that matches number values that are NaN (Not a
224    /// Number).
225    pub fn number_nan() -> Self {
226        Pattern::Leaf(LeafPattern::Number(NumberPattern::nan()))
227    }
228}
229
230impl Pattern {
231    /// Creates a new `Pattern` that matches any byte string value.
232    pub fn any_byte_string() -> Self {
233        Pattern::Leaf(LeafPattern::ByteString(ByteStringPattern::any()))
234    }
235
236    /// Creates a new `Pattern` that matches a specific byte string value.
237    pub fn byte_string(value: impl AsRef<[u8]>) -> Self {
238        Pattern::Leaf(LeafPattern::ByteString(ByteStringPattern::value(value)))
239    }
240
241    /// Creates a new `Pattern` that matches byte string values that match the
242    /// given binary regular expression.
243    pub fn byte_string_binary_regex(regex: regex::bytes::Regex) -> Self {
244        Pattern::Leaf(LeafPattern::ByteString(ByteStringPattern::regex(regex)))
245    }
246}
247
248impl Pattern {
249    pub fn any_known_value() -> Self {
250        Pattern::Leaf(LeafPattern::KnownValue(KnownValuePattern::any()))
251    }
252
253    pub fn known_value(value: KnownValue) -> Self {
254        Pattern::Leaf(LeafPattern::KnownValue(KnownValuePattern::value(value)))
255    }
256
257    pub fn known_value_named<T: Into<String>>(name: T) -> Self {
258        Pattern::Leaf(LeafPattern::KnownValue(KnownValuePattern::named(name)))
259    }
260
261    pub fn known_value_regex(regex: regex::Regex) -> Self {
262        Pattern::Leaf(LeafPattern::KnownValue(KnownValuePattern::regex(regex)))
263    }
264
265    pub fn unit() -> Self { Self::known_value(known_values::UNIT) }
266}
267
268impl Pattern {
269    pub fn any_array() -> Self {
270        Pattern::Leaf(LeafPattern::Array(ArrayPattern::any()))
271    }
272
273    pub fn array_with_range(interval: impl RangeBounds<usize>) -> Self {
274        Pattern::Leaf(LeafPattern::Array(ArrayPattern::interval(interval)))
275    }
276
277    pub fn array_with_count(count: usize) -> Self {
278        Pattern::Leaf(LeafPattern::Array(ArrayPattern::count(count)))
279    }
280
281    /// Creates an array pattern from a dcbor-pattern.
282    /// This is used internally by the parser to delegate to dcbor-pattern.
283    pub fn array_from_dcbor_pattern(pattern: DCBORPattern) -> Self {
284        Pattern::Leaf(LeafPattern::Array(ArrayPattern::from_dcbor_pattern(
285            pattern,
286        )))
287    }
288}
289
290impl Pattern {
291    pub fn any_map() -> Self {
292        Pattern::Leaf(LeafPattern::Map(MapPattern::any()))
293    }
294
295    pub fn map_with_range(interval: impl RangeBounds<usize>) -> Self {
296        Pattern::Leaf(LeafPattern::Map(MapPattern::interval(interval)))
297    }
298
299    pub fn map_with_count(count: usize) -> Self {
300        Pattern::Leaf(LeafPattern::Map(MapPattern::interval(count..=count)))
301    }
302}
303
304impl Pattern {
305    pub fn null() -> Self { Pattern::Leaf(LeafPattern::Null(NullPattern)) }
306}
307
308impl Pattern {
309    /// Creates a new `Pattern` that matches any tagged value.
310    /// This is a proxy to dcbor-pattern's tagged functionality.
311    pub fn any_tag() -> Self {
312        Pattern::Leaf(crate::pattern::leaf::LeafPattern::Tag(
313            crate::pattern::leaf::TaggedPattern::any(),
314        ))
315    }
316
317    /// Creates a new `Pattern` that matches a specific tagged value with any
318    /// content. This is a proxy to dcbor-pattern's tagged functionality.
319    pub fn tagged(tag: impl Into<Tag>, pattern: DCBORPattern) -> Self {
320        Pattern::Leaf(crate::pattern::leaf::LeafPattern::Tag(
321            crate::pattern::leaf::TaggedPattern::with_tag(tag, pattern),
322        ))
323    }
324
325    /// Creates a new `Pattern` that matches a tagged value with specific tag
326    /// name and any content. This is a proxy to dcbor-pattern's tagged
327    /// functionality.
328    pub fn tagged_name(name: impl Into<String>, pattern: DCBORPattern) -> Self {
329        Pattern::Leaf(crate::pattern::leaf::LeafPattern::Tag(
330            crate::pattern::leaf::TaggedPattern::with_name(
331                name.into(),
332                pattern,
333            ),
334        ))
335    }
336
337    /// Creates a new `Pattern` that matches a tagged value with tag name
338    /// matching regex and any content. This is a proxy to dcbor-pattern's
339    /// tagged functionality.
340    pub fn tagged_regex(regex: regex::Regex, pattern: DCBORPattern) -> Self {
341        Pattern::Leaf(crate::pattern::leaf::LeafPattern::Tag(
342            crate::pattern::leaf::TaggedPattern::with_regex(regex, pattern),
343        ))
344    }
345
346    /// Creates a new `Pattern` that matches a tagged value from a
347    /// dcbor_pattern::TaggedPattern. This is an internal helper for the
348    /// parser.
349    pub(crate) fn tagged_from_dcbor_pattern(
350        tagged_pattern: dcbor_pattern::TaggedPattern,
351    ) -> Self {
352        Pattern::Leaf(crate::pattern::leaf::LeafPattern::Tag(
353            crate::pattern::leaf::TaggedPattern::from_dcbor_pattern(
354                tagged_pattern,
355            ),
356        ))
357    }
358}
359
360//
361//
362// endregion
363
364// region: Structure Patterns
365//
366//
367
368impl Pattern {
369    pub fn leaf() -> Self {
370        Pattern::Structure(StructurePattern::Leaf(LeafStructurePattern::new()))
371    }
372
373    pub fn any_assertion() -> Self {
374        Pattern::Structure(StructurePattern::Assertions(
375            AssertionsPattern::any(),
376        ))
377    }
378
379    pub fn assertion_with_predicate(pattern: Pattern) -> Self {
380        Pattern::Structure(StructurePattern::Assertions(
381            AssertionsPattern::with_predicate(pattern),
382        ))
383    }
384
385    pub fn assertion_with_object(pattern: Pattern) -> Self {
386        Pattern::Structure(StructurePattern::Assertions(
387            AssertionsPattern::with_object(pattern),
388        ))
389    }
390}
391
392impl Pattern {
393    pub fn any_subject() -> Self {
394        Pattern::Structure(StructurePattern::Subject(SubjectPattern::any()))
395    }
396
397    pub fn subject(pattern: Pattern) -> Self {
398        Pattern::Structure(StructurePattern::Subject(SubjectPattern::pattern(
399            pattern,
400        )))
401    }
402}
403
404impl Pattern {
405    pub fn any_predicate() -> Self {
406        Pattern::Structure(StructurePattern::Predicate(PredicatePattern::any()))
407    }
408
409    pub fn predicate(pattern: Pattern) -> Self {
410        Pattern::Structure(StructurePattern::Predicate(
411            PredicatePattern::pattern(pattern),
412        ))
413    }
414
415    pub fn any_object() -> Self {
416        Pattern::Structure(StructurePattern::Object(ObjectPattern::any()))
417    }
418
419    pub fn object(pattern: Pattern) -> Self {
420        Pattern::Structure(StructurePattern::Object(ObjectPattern::pattern(
421            pattern,
422        )))
423    }
424}
425
426impl Pattern {
427    pub fn digest(digest: bc_components::Digest) -> Self {
428        Pattern::Structure(StructurePattern::Digest(DigestPattern::digest(
429            digest,
430        )))
431    }
432
433    pub fn digest_prefix(prefix: impl AsRef<[u8]>) -> Self {
434        Pattern::Structure(StructurePattern::Digest(DigestPattern::prefix(
435            prefix,
436        )))
437    }
438
439    pub fn digest_binary_regex(regex: regex::bytes::Regex) -> Self {
440        Pattern::Structure(StructurePattern::Digest(
441            DigestPattern::binary_regex(regex),
442        ))
443    }
444
445    pub fn any_node() -> Self {
446        Pattern::Structure(StructurePattern::Node(NodePattern::any()))
447    }
448
449    pub fn node_with_assertions_range(range: impl RangeBounds<usize>) -> Self {
450        Pattern::Structure(StructurePattern::Node(NodePattern::interval(range)))
451    }
452
453    pub fn node_with_assertions_count(count: usize) -> Self {
454        Pattern::Structure(StructurePattern::Node(NodePattern::interval(
455            count..=count,
456        )))
457    }
458
459    pub fn obscured() -> Self {
460        Pattern::Structure(StructurePattern::Obscured(ObscuredPattern::any()))
461    }
462
463    pub fn elided() -> Self {
464        Pattern::Structure(
465            StructurePattern::Obscured(ObscuredPattern::elided()),
466        )
467    }
468
469    pub fn encrypted() -> Self {
470        Pattern::Structure(StructurePattern::Obscured(
471            ObscuredPattern::encrypted(),
472        ))
473    }
474
475    pub fn compressed() -> Self {
476        Pattern::Structure(StructurePattern::Obscured(
477            ObscuredPattern::compressed(),
478        ))
479    }
480}
481
482//
483//
484// endregion
485
486// region: Meta Patterns
487//
488//
489
490impl Pattern {
491    /// Creates a new `Pattern` that matches any element.
492    pub fn any() -> Self { Pattern::Meta(MetaPattern::Any(AnyPattern::new())) }
493}
494
495impl Pattern {
496    /// Creates a new `Pattern` that only matches if all specified patterns
497    /// match.
498    pub fn and(patterns: Vec<Pattern>) -> Self {
499        Pattern::Meta(MetaPattern::And(AndPattern::new(patterns)))
500    }
501
502    /// Creates a new `Pattern` that matches if at least one of the specified
503    /// patterns matches.
504    pub fn or(patterns: Vec<Pattern>) -> Self {
505        Pattern::Meta(MetaPattern::Or(OrPattern::new(patterns)))
506    }
507}
508
509impl Pattern {
510    /// Creates a new `Pattern` that matches a traversal order of patterns.
511    pub fn traverse(patterns: Vec<Pattern>) -> Self {
512        Pattern::Meta(MetaPattern::Traverse(TraversePattern::new(patterns)))
513    }
514}
515
516impl Pattern {
517    /// Creates a new `Pattern` that searches for a specific pattern within the
518    /// envelope. Useful for finding patterns that may not be at the root
519    /// of the envelope.
520    pub fn search(pattern: Pattern) -> Self {
521        Pattern::Meta(MetaPattern::Search(SearchPattern::new(pattern)))
522    }
523}
524
525impl Pattern {
526    /// Creates a new `Pattern` that negates another pattern; matches if the
527    /// specified pattern does not match.
528    pub fn not_matching(pattern: Pattern) -> Self {
529        Pattern::Meta(MetaPattern::Not(NotPattern::new(pattern)))
530    }
531}
532
533impl Pattern {
534    /// Compile self to byte-code (recursive).
535    pub(crate) fn compile(
536        &self,
537        code: &mut Vec<Instr>,
538        lits: &mut Vec<Pattern>,
539        captures: &mut Vec<String>,
540    ) {
541        use Pattern::*;
542        match self {
543            Leaf(leaf_pattern) => leaf_pattern.compile(code, lits, captures),
544            Structure(struct_pattern) => {
545                struct_pattern.compile(code, lits, captures)
546            }
547            Meta(meta_pattern) => meta_pattern.compile(code, lits, captures),
548        }
549    }
550}
551
552impl Pattern {
553    /// Creates a new `Pattern` that will match a pattern repeated a number of
554    /// times according to the specified range and greediness.
555    ///
556    /// In regex terms:
557    ///
558    /// | Range         | Quantifier   |
559    /// | :------------ | :----------- |
560    /// | `..`          | `*`          |
561    /// | `1..`         | `+`          |
562    /// | `0..=1`       | `?`          |
563    /// | `min..=max`   | `{min,max}`  |
564    /// | `min..`       | `{min,}`     |
565    /// | `..=max`      | `{0,max}`    |
566    /// | `n..=n`       | `{n}`        |
567    pub fn repeat(
568        pattern: Pattern,
569        interval: impl RangeBounds<usize>,
570        reluctance: Reluctance,
571    ) -> Self {
572        Pattern::Meta(MetaPattern::Group(GroupPattern::repeat(
573            pattern,
574            Quantifier::new(interval, reluctance),
575        )))
576    }
577
578    pub fn group(pattern: Pattern) -> Self {
579        Pattern::Meta(MetaPattern::Group(GroupPattern::new(pattern)))
580    }
581}
582
583impl Pattern {
584    /// Creates a new `Pattern` that will capture a pattern match with a name.
585    pub fn capture(name: impl AsRef<str>, pattern: Pattern) -> Self {
586        Pattern::Meta(MetaPattern::Capture(CapturePattern::new(name, pattern)))
587    }
588}
589
590//
591//
592// endregion
593
594impl std::fmt::Display for Pattern {
595    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
596        match self {
597            Pattern::Leaf(leaf) => write!(f, "{}", leaf),
598            Pattern::Structure(structure) => write!(f, "{}", structure),
599            Pattern::Meta(meta) => write!(f, "{}", meta),
600        }
601    }
602}
603
604impl Pattern {
605    /// Internal helper that runs the pattern through the VM and returns the
606    /// matching paths.
607    fn vm_run(
608        &self,
609        env: &Envelope,
610    ) -> Vec<(Path, HashMap<String, Vec<Path>>)> {
611        thread_local! {
612            static PROG: RefCell<HashMap<u64, vm::Program>> = RefCell::new(HashMap::new());
613        }
614
615        // cheap structural hash
616        use std::{
617            collections::hash_map::DefaultHasher,
618            hash::{Hash, Hasher},
619        };
620        let mut h = DefaultHasher::new();
621        self.hash(&mut h);
622        let key = h.finish();
623
624        let prog = PROG
625            .with(|cell| cell.borrow().get(&key).cloned())
626            .unwrap_or_else(|| {
627                let mut p = vm::Program {
628                    code: Vec::new(),
629                    literals: Vec::new(),
630                    capture_names: Vec::new(),
631                };
632                self.compile(
633                    &mut p.code,
634                    &mut p.literals,
635                    &mut p.capture_names,
636                );
637                p.code.push(Instr::Accept);
638                PROG.with(|cell| {
639                    cell.borrow_mut().insert(key, p.clone());
640                });
641                p
642            });
643
644        vm::run(&prog, env)
645    }
646
647    #[allow(dead_code)]
648    fn vm_paths(&self, env: &Envelope) -> Vec<Path> {
649        self.vm_run(env).into_iter().map(|(p, _)| p).collect()
650    }
651
652    pub(crate) fn collect_capture_names(&self, out: &mut Vec<String>) {
653        if let Pattern::Meta(meta) = self {
654            meta.collect_capture_names(out)
655        }
656    }
657}
658
659impl Pattern {
660    /// Creates a new `Pattern` that matches any wrapped envelope without
661    /// descending. Renamed from `wrapped()` to break tests so they can be
662    /// fixed.
663    pub fn wrapped() -> Self {
664        Pattern::Structure(StructurePattern::Wrapped(WrappedPattern::new()))
665    }
666
667    /// Creates a new `Pattern` that matches a wrapped envelope and also matches
668    /// on its unwrapped content.
669    pub fn unwrap_matching(pattern: Pattern) -> Self {
670        Pattern::Structure(StructurePattern::Wrapped(
671            WrappedPattern::unwrap_matching(pattern),
672        ))
673    }
674
675    /// Creates a new `Pattern` that matches any wrapped envelope and descends
676    /// into it.
677    pub fn unwrap() -> Self {
678        Pattern::Structure(StructurePattern::Wrapped(WrappedPattern::unwrap()))
679    }
680}