dcbor_pattern/pattern/
pattern_impl.rs

1use crate::{
2    Error, Result,
3    pattern::{
4        Matcher, Path, meta::MetaPattern, structure::StructurePattern,
5        value::ValuePattern, vm::Instr,
6    },
7};
8
9use dcbor::prelude::*;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum Pattern {
13    Value(ValuePattern),
14    Structure(StructurePattern),
15    Meta(MetaPattern),
16}
17
18impl Pattern {
19    /// Creates a pattern that matches any boolean value.
20    pub fn any_bool() -> Self {
21        Pattern::Value(ValuePattern::Bool(
22            crate::pattern::value::BoolPattern::any(),
23        ))
24    }
25
26    /// Creates a pattern that matches a specific boolean value.
27    pub fn bool(value: bool) -> Self {
28        Pattern::Value(ValuePattern::Bool(
29            crate::pattern::value::BoolPattern::value(value),
30        ))
31    }
32
33    /// Creates a pattern that matches any number value.
34    pub fn any_number() -> Self {
35        Pattern::Value(ValuePattern::Number(
36            crate::pattern::value::NumberPattern::any(),
37        ))
38    }
39
40    /// Creates a pattern that matches a specific number value.
41    pub fn number<T>(value: T) -> Self
42    where
43        T: Into<f64>,
44    {
45        Pattern::Value(ValuePattern::Number(
46            crate::pattern::value::NumberPattern::value(value),
47        ))
48    }
49
50    /// Creates a pattern that matches numbers within a range.
51    pub fn number_range<A>(range: std::ops::RangeInclusive<A>) -> Self
52    where
53        A: Into<f64> + Copy,
54    {
55        Pattern::Value(ValuePattern::Number(
56            crate::pattern::value::NumberPattern::range(range),
57        ))
58    }
59
60    /// Creates a pattern that matches numbers greater than the specified value.
61    pub fn number_greater_than<T>(value: T) -> Self
62    where
63        T: Into<f64>,
64    {
65        Pattern::Value(ValuePattern::Number(
66            crate::pattern::value::NumberPattern::greater_than(value),
67        ))
68    }
69
70    /// Creates a pattern that matches numbers greater than or equal to the
71    /// specified value.
72    pub fn number_greater_than_or_equal<T>(value: T) -> Self
73    where
74        T: Into<f64>,
75    {
76        Pattern::Value(ValuePattern::Number(
77            crate::pattern::value::NumberPattern::greater_than_or_equal(value),
78        ))
79    }
80
81    /// Creates a pattern that matches numbers less than the specified value.
82    pub fn number_less_than<T>(value: T) -> Self
83    where
84        T: Into<f64>,
85    {
86        Pattern::Value(ValuePattern::Number(
87            crate::pattern::value::NumberPattern::less_than(value),
88        ))
89    }
90
91    /// Creates a pattern that matches numbers less than or equal to the
92    /// specified value.
93    pub fn number_less_than_or_equal<T>(value: T) -> Self
94    where
95        T: Into<f64>,
96    {
97        Pattern::Value(ValuePattern::Number(
98            crate::pattern::value::NumberPattern::less_than_or_equal(value),
99        ))
100    }
101
102    /// Creates a pattern that matches NaN values.
103    pub fn number_nan() -> Self {
104        Pattern::Value(ValuePattern::Number(
105            crate::pattern::value::NumberPattern::nan(),
106        ))
107    }
108
109    /// Creates a pattern that matches positive infinity values.
110    pub fn number_infinity() -> Self {
111        Pattern::Value(ValuePattern::Number(
112            crate::pattern::value::NumberPattern::infinity(),
113        ))
114    }
115
116    /// Creates a pattern that matches negative infinity values.
117    pub fn number_neg_infinity() -> Self {
118        Pattern::Value(ValuePattern::Number(
119            crate::pattern::value::NumberPattern::neg_infinity(),
120        ))
121    }
122
123    /// Creates a pattern that matches any text value.
124    pub fn any_text() -> Self {
125        Pattern::Value(ValuePattern::Text(
126            crate::pattern::value::TextPattern::any(),
127        ))
128    }
129
130    /// Creates a pattern that matches a specific text value.
131    pub fn text<T: Into<String>>(value: T) -> Self {
132        Pattern::Value(ValuePattern::Text(
133            crate::pattern::value::TextPattern::value(value),
134        ))
135    }
136
137    /// Creates a pattern that matches text using a regex.
138    pub fn text_regex(regex: regex::Regex) -> Self {
139        Pattern::Value(ValuePattern::Text(
140            crate::pattern::value::TextPattern::regex(regex),
141        ))
142    }
143
144    /// Creates a pattern that matches any byte string value.
145    pub fn any_byte_string() -> Self {
146        Pattern::Value(ValuePattern::ByteString(
147            crate::pattern::value::ByteStringPattern::any(),
148        ))
149    }
150
151    /// Creates a pattern that matches a specific byte string value.
152    pub fn byte_string(value: impl AsRef<[u8]>) -> Self {
153        Pattern::Value(ValuePattern::ByteString(
154            crate::pattern::value::ByteStringPattern::value(value),
155        ))
156    }
157
158    /// Creates a pattern that matches byte strings using a binary regex.
159    pub fn byte_string_regex(regex: regex::bytes::Regex) -> Self {
160        Pattern::Value(ValuePattern::ByteString(
161            crate::pattern::value::ByteStringPattern::regex(regex),
162        ))
163    }
164
165    /// Creates a pattern that matches any date value.
166    pub fn any_date() -> Self {
167        Pattern::Value(ValuePattern::Date(
168            crate::pattern::value::DatePattern::any(),
169        ))
170    }
171
172    /// Creates a pattern that matches a specific date value.
173    pub fn date(date: Date) -> Self {
174        Pattern::Value(ValuePattern::Date(
175            crate::pattern::value::DatePattern::value(date),
176        ))
177    }
178
179    /// Creates a pattern that matches dates within a range (inclusive).
180    pub fn date_range(range: std::ops::RangeInclusive<Date>) -> Self {
181        Pattern::Value(ValuePattern::Date(
182            crate::pattern::value::DatePattern::range(range),
183        ))
184    }
185
186    /// Creates a pattern that matches dates that are on or after the specified
187    /// date.
188    pub fn date_earliest(date: Date) -> Self {
189        Pattern::Value(ValuePattern::Date(
190            crate::pattern::value::DatePattern::earliest(date),
191        ))
192    }
193
194    /// Creates a pattern that matches dates that are on or before the specified
195    /// date.
196    pub fn date_latest(date: Date) -> Self {
197        Pattern::Value(ValuePattern::Date(
198            crate::pattern::value::DatePattern::latest(date),
199        ))
200    }
201
202    /// Creates a pattern that matches a date by its ISO-8601 string
203    /// representation.
204    pub fn date_iso8601(iso_string: impl Into<String>) -> Self {
205        Pattern::Value(ValuePattern::Date(
206            crate::pattern::value::DatePattern::string(iso_string),
207        ))
208    }
209
210    /// Creates a pattern that matches dates whose ISO-8601 string
211    /// representation matches the given regex pattern.
212    pub fn date_regex(regex: regex::Regex) -> Self {
213        Pattern::Value(ValuePattern::Date(
214            crate::pattern::value::DatePattern::regex(regex),
215        ))
216    }
217
218    /// Creates a pattern that matches null values.
219    pub fn null() -> Self {
220        Pattern::Value(ValuePattern::Null(crate::pattern::value::NullPattern))
221    }
222
223    /// Creates a pattern that matches any known value.
224    pub fn any_known_value() -> Self {
225        Pattern::Value(ValuePattern::KnownValue(
226            crate::pattern::value::KnownValuePattern::any(),
227        ))
228    }
229
230    /// Creates a pattern that matches a specific known value.
231    pub fn known_value(value: known_values::KnownValue) -> Self {
232        Pattern::Value(ValuePattern::KnownValue(
233            crate::pattern::value::KnownValuePattern::value(value),
234        ))
235    }
236
237    /// Creates a pattern that matches a known value by name.
238    pub fn known_value_named(name: impl Into<String>) -> Self {
239        Pattern::Value(ValuePattern::KnownValue(
240            crate::pattern::value::KnownValuePattern::named(name),
241        ))
242    }
243
244    /// Creates a pattern that matches known values using a regex on their
245    /// names.
246    pub fn known_value_regex(regex: regex::Regex) -> Self {
247        Pattern::Value(ValuePattern::KnownValue(
248            crate::pattern::value::KnownValuePattern::regex(regex),
249        ))
250    }
251
252    // Digest pattern convenience methods
253
254    /// Creates a pattern that matches any digest value.
255    pub fn any_digest() -> Self {
256        Pattern::Value(ValuePattern::Digest(
257            crate::pattern::value::DigestPattern::any(),
258        ))
259    }
260
261    /// Creates a pattern that matches a specific digest.
262    pub fn digest(digest: bc_components::Digest) -> Self {
263        Pattern::Value(ValuePattern::Digest(
264            crate::pattern::value::DigestPattern::digest(digest),
265        ))
266    }
267
268    /// Creates a pattern that matches digests with the specified prefix.
269    pub fn digest_prefix(prefix: impl AsRef<[u8]>) -> Self {
270        Pattern::Value(ValuePattern::Digest(
271            crate::pattern::value::DigestPattern::prefix(prefix),
272        ))
273    }
274
275    /// Creates a pattern that matches digests using a binary regex.
276    pub fn digest_binary_regex(regex: regex::bytes::Regex) -> Self {
277        Pattern::Value(ValuePattern::Digest(
278            crate::pattern::value::DigestPattern::binary_regex(regex),
279        ))
280    }
281
282    /// Creates a pattern that always matches any CBOR value.
283    pub fn any() -> Self {
284        Pattern::Meta(MetaPattern::Any(crate::pattern::meta::AnyPattern::new()))
285    }
286
287    /// Creates a pattern that matches if all contained patterns match.
288    pub fn and(patterns: Vec<Pattern>) -> Self {
289        Pattern::Meta(MetaPattern::And(crate::pattern::meta::AndPattern::new(
290            patterns,
291        )))
292    }
293
294    /// Creates a pattern that matches if any contained pattern matches.
295    pub fn or(patterns: Vec<Pattern>) -> Self {
296        Pattern::Meta(MetaPattern::Or(crate::pattern::meta::OrPattern::new(
297            patterns,
298        )))
299    }
300
301    /// Creates a pattern that matches if the inner pattern does not match.
302    pub fn not_matching(pattern: Pattern) -> Self {
303        Pattern::Meta(MetaPattern::Not(crate::pattern::meta::NotPattern::new(
304            pattern,
305        )))
306    }
307
308    /// Creates a pattern that captures matches with the given name.
309    pub fn capture(name: impl AsRef<str>, pattern: Pattern) -> Self {
310        Pattern::Meta(MetaPattern::Capture(
311            crate::pattern::meta::CapturePattern::new(name, pattern),
312        ))
313    }
314
315    /// Creates a search pattern that recursively searches the entire dCBOR
316    /// tree.
317    pub fn search(pattern: Pattern) -> Self {
318        Pattern::Meta(MetaPattern::Search(
319            crate::pattern::meta::SearchPattern::new(pattern),
320        ))
321    }
322
323    /// Creates a pattern that matches with repetition using a quantifier.
324    pub fn repeat(pattern: Pattern, quantifier: crate::Quantifier) -> Self {
325        Pattern::Meta(MetaPattern::Repeat(
326            crate::pattern::meta::RepeatPattern::repeat(pattern, quantifier),
327        ))
328    }
329
330    /// Creates a pattern that wraps another pattern (matches exactly once).
331    pub fn group(pattern: Pattern) -> Self {
332        Pattern::Meta(MetaPattern::Repeat(
333            crate::pattern::meta::RepeatPattern::new(pattern),
334        ))
335    }
336
337    /// Creates a sequence pattern that matches patterns in order.
338    pub fn sequence(patterns: Vec<Pattern>) -> Self {
339        Pattern::Meta(MetaPattern::Sequence(
340            crate::pattern::meta::SequencePattern::new(patterns),
341        ))
342    }
343
344    /// Creates a pattern that matches any array.
345    pub fn any_array() -> Self {
346        Pattern::Structure(crate::pattern::structure::StructurePattern::Array(
347            crate::pattern::structure::ArrayPattern::any(),
348        ))
349    }
350
351    /// Creates a pattern that matches any map.
352    pub fn any_map() -> Self {
353        Pattern::Structure(crate::pattern::structure::StructurePattern::Map(
354            crate::pattern::structure::MapPattern::any(),
355        ))
356    }
357}
358
359
360impl Pattern {
361    /// Creates a pattern that matches any tagged value.
362    pub fn any_tagged() -> Self {
363        Pattern::Structure(crate::pattern::structure::StructurePattern::Tagged(
364            crate::pattern::structure::TaggedPattern::any(),
365        ))
366    }
367
368    /// Creates a pattern that matches a tagged item with content pattern.
369    pub fn tagged(tag: impl Into<Tag>, pattern: Pattern) -> Self {
370        Pattern::Structure(crate::pattern::structure::StructurePattern::Tagged(
371            crate::pattern::structure::TaggedPattern::with_tag(tag, pattern),
372        ))
373    }
374
375    /// Creates a pattern that matches a tagged item with content pattern and
376    /// a specific tag name.
377    pub fn tagged_name(name: impl Into<String>, pattern: Pattern) -> Self {
378        Pattern::Structure(crate::pattern::structure::StructurePattern::Tagged(
379            crate::pattern::structure::TaggedPattern::with_name(name, pattern),
380        ))
381    }
382
383    /// Creates a pattern that matches a tagged item with content pattern and
384    /// a regex for the tag name.
385    pub fn tagged_regex(regex: regex::Regex, pattern: Pattern) -> Self {
386        Pattern::Structure(crate::pattern::structure::StructurePattern::Tagged(
387            crate::pattern::structure::TaggedPattern::with_regex(regex, pattern),
388        ))
389    }
390}
391
392impl Pattern {
393    /// Parse a pattern expression from a string.
394    ///
395    /// This method supports the full dCBOR pattern syntax including:
396    /// - Value patterns: bool, text, number, null, bstr, date, digest, known
397    /// - Structure patterns: Array, Map, Tagged
398    /// - Meta patterns: *, NONE, AND (&), OR (|), NOT (!)
399    /// - Capture patterns: @name(pattern)
400    /// - Grouping with parentheses
401    /// - Quantifiers: *, +, ?, {n,m}
402    ///
403    /// Examples:
404    /// - `bool` - matches any boolean value
405    /// - `true` - matches the boolean value true
406    /// - `false` - matches the boolean value false
407    /// - `"hello"` - matches the text "hello"
408    /// - `1..10` - matches numbers from 1 to 10
409    /// - `bool | text` - matches boolean or text values
410    /// - `@name(text)` - captures text with name "name"
411    pub fn parse(input: &str) -> Result<Self> {
412        let (pattern, consumed) = Self::parse_partial(input)?;
413        if consumed < input.len() {
414            // Find where we stopped to provide accurate error span
415            return Err(Error::ExtraData(consumed..input.len()));
416        }
417        Ok(pattern)
418    }
419
420    /// Parses a pattern from the beginning of a string and returns both
421    /// the parsed Pattern and the number of bytes consumed.
422    ///
423    /// Unlike `parse()`, this function succeeds even if additional
424    /// characters follow the first pattern. The returned index points to the
425    /// first unparsed character after the pattern.
426    ///
427    /// # Example
428    ///
429    /// ```rust
430    /// # use dcbor_pattern::Pattern;
431    /// let (pattern, consumed) = Pattern::parse_partial("true rest").unwrap();
432    /// assert_eq!(pattern, Pattern::bool(true));
433    /// assert_eq!(consumed, 5); // "true ".len() - includes whitespace
434    /// ```
435    pub fn parse_partial(input: &str) -> Result<(Self, usize)> {
436        use logos::Logos;
437
438        use crate::parse::{Token, meta::parse_or};
439
440        let mut lexer = Token::lexer(input);
441        let pattern = parse_or(&mut lexer)?;
442
443        // Calculate consumed bytes - much simpler than current approach!
444        let consumed = match lexer.next() {
445            Some(_) => lexer.span().start,
446            None => input.len(),
447        };
448
449        Ok((pattern, consumed))
450    }
451}
452
453impl TryFrom<&str> for Pattern {
454    type Error = Error;
455
456    fn try_from(value: &str) -> Result<Self> {
457        Self::parse(value)
458    }
459}
460
461impl Matcher for Pattern {
462    fn paths_with_captures(
463        &self,
464        haystack: &CBOR,
465    ) -> (Vec<Path>, std::collections::HashMap<String, Vec<Path>>) {
466        // Collect all capture names from this pattern
467        let mut capture_names = Vec::new();
468        self.collect_capture_names(&mut capture_names);
469
470        // If no captures, use the faster direct path matching
471        if capture_names.is_empty() {
472            return (self.paths(haystack), std::collections::HashMap::new());
473        }
474
475        // For certain pattern types, delegate directly to their
476        // paths_with_captures
477        match self {
478            Pattern::Meta(pattern) => {
479                // Meta patterns like SearchPattern handle their own capture
480                // logic
481                return pattern.paths_with_captures(haystack);
482            }
483            Pattern::Structure(pattern) => {
484                // Structure patterns like ArrayPattern handle their own capture
485                // logic, including special handling for SequencePattern
486                return pattern.paths_with_captures(haystack);
487            }
488            _ => {
489                // Use VM for other pattern types that need it
490            }
491        }
492
493        // Compile pattern to VM program for capture-aware matching
494        let mut code = Vec::new();
495        let mut literals = Vec::new();
496        let mut captures = Vec::new();
497
498        self.compile(&mut code, &mut literals, &mut captures);
499        code.push(crate::pattern::vm::Instr::Accept);
500
501        let program = crate::pattern::vm::Program {
502            code,
503            literals,
504            capture_names: captures,
505        };
506
507        // Run VM to get paths and captures
508        crate::pattern::vm::run(&program, haystack)
509    }
510
511    fn paths(&self, haystack: &CBOR) -> Vec<Path> {
512        match self {
513            Pattern::Value(pattern) => pattern.paths(haystack),
514            Pattern::Structure(pattern) => pattern.paths(haystack),
515            Pattern::Meta(pattern) => pattern.paths(haystack),
516        }
517    }
518
519    fn compile(
520        &self,
521        code: &mut Vec<Instr>,
522        literals: &mut Vec<Pattern>,
523        captures: &mut Vec<String>,
524    ) {
525        match self {
526            Pattern::Value(pattern) => {
527                pattern.compile(code, literals, captures);
528            }
529            Pattern::Structure(pattern) => {
530                pattern.compile(code, literals, captures);
531            }
532            Pattern::Meta(pattern) => {
533                pattern.compile(code, literals, captures);
534            }
535        }
536    }
537
538    /// Recursively collect all capture names from this pattern.
539    fn collect_capture_names(&self, names: &mut Vec<String>) {
540        match self {
541            Pattern::Value(_) => {
542                // Value patterns don't contain captures
543            }
544            Pattern::Structure(pattern) => {
545                pattern.collect_capture_names(names);
546            }
547            Pattern::Meta(pattern) => {
548                pattern.collect_capture_names(names);
549            }
550        }
551    }
552
553    fn is_complex(&self) -> bool {
554        match self {
555            Pattern::Value(pattern) => pattern.is_complex(),
556            Pattern::Structure(_pattern) => false, /* TODO: implement when */
557            // ready
558            Pattern::Meta(pattern) => pattern.is_complex(),
559        }
560    }
561}
562
563impl std::fmt::Display for Pattern {
564    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
565        match self {
566            Pattern::Value(pattern) => write!(f, "{}", pattern),
567            Pattern::Structure(pattern) => write!(f, "{}", pattern),
568            Pattern::Meta(pattern) => write!(f, "{}", pattern),
569        }
570    }
571}