dcbor_pattern/pattern/value/
bytestring_pattern.rs

1use dcbor::prelude::*;
2
3use crate::pattern::{Matcher, Path, Pattern, vm::Instr};
4
5/// Pattern for matching byte string values in dCBOR.
6#[derive(Debug, Clone)]
7pub enum ByteStringPattern {
8    /// Matches any byte string.
9    Any,
10    /// Matches the specific byte string.
11    Value(Vec<u8>),
12    /// Matches the binary regex for a byte string.
13    BinaryRegex(regex::bytes::Regex),
14}
15
16impl PartialEq for ByteStringPattern {
17    fn eq(&self, other: &Self) -> bool {
18        match (self, other) {
19            (ByteStringPattern::Any, ByteStringPattern::Any) => true,
20            (ByteStringPattern::Value(a), ByteStringPattern::Value(b)) => {
21                a == b
22            }
23            (
24                ByteStringPattern::BinaryRegex(a),
25                ByteStringPattern::BinaryRegex(b),
26            ) => a.as_str() == b.as_str(),
27            _ => false,
28        }
29    }
30}
31
32impl Eq for ByteStringPattern {}
33
34impl std::hash::Hash for ByteStringPattern {
35    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
36        match self {
37            ByteStringPattern::Any => {
38                0u8.hash(state);
39            }
40            ByteStringPattern::Value(s) => {
41                1u8.hash(state);
42                s.hash(state);
43            }
44            ByteStringPattern::BinaryRegex(regex) => {
45                2u8.hash(state);
46                // Regex does not implement Hash, so we hash its pattern string.
47                regex.as_str().hash(state);
48            }
49        }
50    }
51}
52
53impl ByteStringPattern {
54    /// Creates a new `ByteStringPattern` that matches any byte string.
55    pub fn any() -> Self { ByteStringPattern::Any }
56
57    /// Creates a new `ByteStringPattern` that matches the specific byte string.
58    pub fn value(value: impl AsRef<[u8]>) -> Self {
59        ByteStringPattern::Value(value.as_ref().to_vec())
60    }
61
62    /// Creates a new `ByteStringPattern` that matches the binary regex for a
63    /// byte string.
64    pub fn regex(regex: regex::bytes::Regex) -> Self {
65        ByteStringPattern::BinaryRegex(regex)
66    }
67}
68
69impl Matcher for ByteStringPattern {
70    fn paths(&self, haystack: &CBOR) -> Vec<Path> {
71        let is_hit =
72            haystack.as_byte_string().is_some_and(|bytes| match self {
73                ByteStringPattern::Any => true,
74                ByteStringPattern::Value(want) => bytes == want,
75                ByteStringPattern::BinaryRegex(regex) => regex.is_match(bytes),
76            });
77
78        if is_hit {
79            vec![vec![haystack.clone()]]
80        } else {
81            vec![]
82        }
83    }
84
85    fn compile(
86        &self,
87        code: &mut Vec<Instr>,
88        literals: &mut Vec<Pattern>,
89        _captures: &mut Vec<String>,
90    ) {
91        let idx = literals.len();
92        literals.push(Pattern::Value(
93            crate::pattern::ValuePattern::ByteString(self.clone()),
94        ));
95        code.push(Instr::MatchPredicate(idx));
96    }
97}
98
99impl std::fmt::Display for ByteStringPattern {
100    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
101        match self {
102            ByteStringPattern::Any => write!(f, "bstr"),
103            ByteStringPattern::Value(value) => {
104                write!(f, "h'{}'", hex::encode(value))
105            }
106            ByteStringPattern::BinaryRegex(regex) => {
107                write!(f, "h'/{}/'", regex.as_str())
108            }
109        }
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    #[test]
118    fn test_byte_string_pattern_display() {
119        assert_eq!(ByteStringPattern::any().to_string(), "bstr");
120        assert_eq!(
121            ByteStringPattern::value(vec![1, 2, 3]).to_string(),
122            r#"h'010203'"#
123        );
124        assert_eq!(
125            ByteStringPattern::regex(
126                regex::bytes::Regex::new(r"^\d+$").unwrap()
127            )
128            .to_string(),
129            r#"h'/^\d+$/'"#
130        );
131    }
132
133    #[test]
134    fn test_byte_string_pattern_matching() {
135        let hello_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f]; // "Hello"
136        let hello_cbor = CBOR::to_byte_string(hello_bytes.clone());
137        let world_bytes = vec![0x57, 0x6f, 0x72, 0x6c, 0x64]; // "World"
138        let world_cbor = CBOR::to_byte_string(world_bytes.clone());
139        let digits_bytes = vec![0x31, 0x32, 0x33, 0x34, 0x35]; // "12345"
140        let digits_cbor = CBOR::to_byte_string(digits_bytes.clone());
141        let mixed_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x31, 0x32, 0x33]; // "Hello123"
142        let mixed_cbor = CBOR::to_byte_string(mixed_bytes.clone());
143        let text_cbor = "Hello".to_cbor();
144
145        // Test Any pattern
146        let any_pattern = ByteStringPattern::any();
147        assert!(any_pattern.matches(&hello_cbor));
148        assert!(any_pattern.matches(&world_cbor));
149        assert!(any_pattern.matches(&digits_cbor));
150        assert!(any_pattern.matches(&mixed_cbor));
151        assert!(!any_pattern.matches(&text_cbor));
152
153        // Test specific value patterns
154        let hello_pattern = ByteStringPattern::value(hello_bytes.clone());
155        assert!(hello_pattern.matches(&hello_cbor));
156        assert!(!hello_pattern.matches(&world_cbor));
157        assert!(!hello_pattern.matches(&text_cbor));
158
159        // Test regex patterns
160        let digits_regex = regex::bytes::Regex::new(r"^\d+$").unwrap();
161        let digits_pattern = ByteStringPattern::regex(digits_regex);
162        assert!(!digits_pattern.matches(&hello_cbor));
163        assert!(!digits_pattern.matches(&world_cbor));
164        assert!(digits_pattern.matches(&digits_cbor));
165        assert!(!digits_pattern.matches(&mixed_cbor));
166        assert!(!digits_pattern.matches(&text_cbor));
167
168        let alpha_regex = regex::bytes::Regex::new(r"^[A-Za-z]+$").unwrap();
169        let alpha_pattern = ByteStringPattern::regex(alpha_regex);
170        assert!(alpha_pattern.matches(&hello_cbor));
171        assert!(alpha_pattern.matches(&world_cbor));
172        assert!(!alpha_pattern.matches(&digits_cbor));
173        assert!(!alpha_pattern.matches(&mixed_cbor));
174        assert!(!alpha_pattern.matches(&text_cbor));
175    }
176
177    #[test]
178    fn test_byte_string_pattern_paths() {
179        let hello_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f]; // "Hello"
180        let hello_cbor = CBOR::to_byte_string(hello_bytes.clone());
181        let text_cbor = "Hello".to_cbor();
182
183        let any_pattern = ByteStringPattern::any();
184        let hello_paths = any_pattern.paths(&hello_cbor);
185        assert_eq!(hello_paths.len(), 1);
186        assert_eq!(hello_paths[0].len(), 1);
187        assert_eq!(hello_paths[0][0], hello_cbor);
188
189        let text_paths = any_pattern.paths(&text_cbor);
190        assert_eq!(text_paths.len(), 0);
191
192        let hello_pattern = ByteStringPattern::value(hello_bytes.clone());
193        let paths = hello_pattern.paths(&hello_cbor);
194        assert_eq!(paths.len(), 1);
195        assert_eq!(paths[0].len(), 1);
196        assert_eq!(paths[0][0], hello_cbor);
197
198        let no_match_paths = hello_pattern.paths(&text_cbor);
199        assert_eq!(no_match_paths.len(), 0);
200    }
201
202    #[test]
203    fn test_byte_string_pattern_equality() {
204        let any1 = ByteStringPattern::any();
205        let any2 = ByteStringPattern::any();
206        let value1 = ByteStringPattern::value(vec![1, 2, 3]);
207        let value2 = ByteStringPattern::value(vec![1, 2, 3]);
208        let value3 = ByteStringPattern::value(vec![4, 5, 6]);
209        let regex1 =
210            ByteStringPattern::regex(regex::bytes::Regex::new(r"\d+").unwrap());
211        let regex2 =
212            ByteStringPattern::regex(regex::bytes::Regex::new(r"\d+").unwrap());
213        let regex3 = ByteStringPattern::regex(
214            regex::bytes::Regex::new(r"[a-z]+").unwrap(),
215        );
216
217        // Test equality
218        assert_eq!(any1, any2);
219        assert_eq!(value1, value2);
220        assert_eq!(regex1, regex2);
221
222        // Test inequality
223        assert_ne!(any1, value1);
224        assert_ne!(value1, value3);
225        assert_ne!(regex1, regex3);
226        assert_ne!(value1, regex1);
227    }
228
229    #[test]
230    fn test_byte_string_pattern_regex_complex() {
231        // Test with binary data that looks like an email pattern
232        let email_bytes = b"test@example.com";
233        let email_cbor = CBOR::to_byte_string(email_bytes);
234        let not_email_bytes = b"not_an_email";
235        let not_email_cbor = CBOR::to_byte_string(not_email_bytes);
236
237        // Simple email regex pattern
238        let email_regex =
239            regex::bytes::Regex::new(r"^[^@]+@[^@]+\.[^@]+$").unwrap();
240        let email_pattern = ByteStringPattern::regex(email_regex);
241
242        assert!(email_pattern.matches(&email_cbor));
243        assert!(!email_pattern.matches(&not_email_cbor));
244    }
245
246    #[test]
247    fn test_byte_string_pattern_binary_data() {
248        // Test with actual binary data (not text)
249        let binary_data = vec![0x00, 0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD];
250        let binary_cbor = CBOR::to_byte_string(binary_data.clone());
251
252        let any_pattern = ByteStringPattern::any();
253        assert!(any_pattern.matches(&binary_cbor));
254
255        let exact_pattern = ByteStringPattern::value(binary_data.clone());
256        assert!(exact_pattern.matches(&binary_cbor));
257
258        let different_pattern =
259            ByteStringPattern::value(vec![0x00, 0x01, 0x02]);
260        assert!(!different_pattern.matches(&binary_cbor));
261
262        // Test regex that matches any bytes starting with 0x00
263        let starts_with_zero_regex =
264            regex::bytes::Regex::new(r"^\x00").unwrap();
265        let starts_with_zero_pattern =
266            ByteStringPattern::regex(starts_with_zero_regex);
267        assert!(starts_with_zero_pattern.matches(&binary_cbor));
268
269        // Test regex that doesn't match
270        let starts_with_one_regex = regex::bytes::Regex::new(r"^\x01").unwrap();
271        let starts_with_one_pattern =
272            ByteStringPattern::regex(starts_with_one_regex);
273        assert!(!starts_with_one_pattern.matches(&binary_cbor));
274    }
275}