dcbor_pattern/pattern/value/
bytestring_pattern.rs

1use dcbor::prelude::*;
2
3use crate::pattern::{Matcher, Path, Pattern, vm::Instr};
4
5/// Pattern for matching byte string values in dCBOR.
6#[derive(Debug, Clone)]
7pub enum ByteStringPattern {
8    /// Matches any byte string.
9    Any,
10    /// Matches the specific byte string.
11    Value(Vec<u8>),
12    /// Matches the binary regex for a byte string.
13    BinaryRegex(regex::bytes::Regex),
14}
15
16impl PartialEq for ByteStringPattern {
17    fn eq(&self, other: &Self) -> bool {
18        match (self, other) {
19            (ByteStringPattern::Any, ByteStringPattern::Any) => true,
20            (ByteStringPattern::Value(a), ByteStringPattern::Value(b)) => {
21                a == b
22            }
23            (ByteStringPattern::BinaryRegex(a), ByteStringPattern::BinaryRegex(b)) => {
24                a.as_str() == b.as_str()
25            }
26            _ => false,
27        }
28    }
29}
30
31impl Eq for ByteStringPattern {}
32
33impl std::hash::Hash for ByteStringPattern {
34    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
35        match self {
36            ByteStringPattern::Any => {
37                0u8.hash(state);
38            }
39            ByteStringPattern::Value(s) => {
40                1u8.hash(state);
41                s.hash(state);
42            }
43            ByteStringPattern::BinaryRegex(regex) => {
44                2u8.hash(state);
45                // Regex does not implement Hash, so we hash its pattern string.
46                regex.as_str().hash(state);
47            }
48        }
49    }
50}
51
52impl ByteStringPattern {
53    /// Creates a new `ByteStringPattern` that matches any byte string.
54    pub fn any() -> Self { ByteStringPattern::Any }
55
56    /// Creates a new `ByteStringPattern` that matches the specific byte string.
57    pub fn value(value: impl AsRef<[u8]>) -> Self {
58        ByteStringPattern::Value(value.as_ref().to_vec())
59    }
60
61    /// Creates a new `ByteStringPattern` that matches the binary regex for a
62    /// byte string.
63    pub fn regex(regex: regex::bytes::Regex) -> Self {
64        ByteStringPattern::BinaryRegex(regex)
65    }
66}
67
68impl Matcher for ByteStringPattern {
69    fn paths(&self, haystack: &CBOR) -> Vec<Path> {
70        let is_hit = haystack.as_byte_string().is_some_and(|bytes| match self {
71            ByteStringPattern::Any => true,
72            ByteStringPattern::Value(want) => bytes == want,
73            ByteStringPattern::BinaryRegex(regex) => regex.is_match(bytes),
74        });
75
76        if is_hit {
77            vec![vec![haystack.clone()]]
78        } else {
79            vec![]
80        }
81    }
82
83    fn compile(
84        &self,
85        code: &mut Vec<Instr>,
86        literals: &mut Vec<Pattern>,
87        _captures: &mut Vec<String>,
88    ) {
89        let idx = literals.len();
90        literals.push(Pattern::Value(
91            crate::pattern::ValuePattern::ByteString(self.clone()),
92        ));
93        code.push(Instr::MatchPredicate(idx));
94    }
95}
96
97impl std::fmt::Display for ByteStringPattern {
98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99        match self {
100            ByteStringPattern::Any => write!(f, "bstr"),
101            ByteStringPattern::Value(value) => {
102                write!(f, "h'{}'", hex::encode(value))
103            }
104            ByteStringPattern::BinaryRegex(regex) => {
105                write!(f, "h'/{}/'", regex.as_str())
106            }
107        }
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn test_byte_string_pattern_display() {
117        assert_eq!(ByteStringPattern::any().to_string(), "bstr");
118        assert_eq!(
119            ByteStringPattern::value(vec![1, 2, 3]).to_string(),
120            r#"h'010203'"#
121        );
122        assert_eq!(
123            ByteStringPattern::regex(
124                regex::bytes::Regex::new(r"^\d+$").unwrap()
125            )
126            .to_string(),
127            r#"h'/^\d+$/'"#
128        );
129    }
130
131    #[test]
132    fn test_byte_string_pattern_matching() {
133        let hello_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f]; // "Hello"
134        let hello_cbor = CBOR::to_byte_string(hello_bytes.clone());
135        let world_bytes = vec![0x57, 0x6f, 0x72, 0x6c, 0x64]; // "World"
136        let world_cbor = CBOR::to_byte_string(world_bytes.clone());
137        let digits_bytes = vec![0x31, 0x32, 0x33, 0x34, 0x35]; // "12345"
138        let digits_cbor = CBOR::to_byte_string(digits_bytes.clone());
139        let mixed_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x31, 0x32, 0x33]; // "Hello123"
140        let mixed_cbor = CBOR::to_byte_string(mixed_bytes.clone());
141        let text_cbor = "Hello".to_cbor();
142
143        // Test Any pattern
144        let any_pattern = ByteStringPattern::any();
145        assert!(any_pattern.matches(&hello_cbor));
146        assert!(any_pattern.matches(&world_cbor));
147        assert!(any_pattern.matches(&digits_cbor));
148        assert!(any_pattern.matches(&mixed_cbor));
149        assert!(!any_pattern.matches(&text_cbor));
150
151        // Test specific value patterns
152        let hello_pattern = ByteStringPattern::value(hello_bytes.clone());
153        assert!(hello_pattern.matches(&hello_cbor));
154        assert!(!hello_pattern.matches(&world_cbor));
155        assert!(!hello_pattern.matches(&text_cbor));
156
157        // Test regex patterns
158        let digits_regex = regex::bytes::Regex::new(r"^\d+$").unwrap();
159        let digits_pattern = ByteStringPattern::regex(digits_regex);
160        assert!(!digits_pattern.matches(&hello_cbor));
161        assert!(!digits_pattern.matches(&world_cbor));
162        assert!(digits_pattern.matches(&digits_cbor));
163        assert!(!digits_pattern.matches(&mixed_cbor));
164        assert!(!digits_pattern.matches(&text_cbor));
165
166        let alpha_regex = regex::bytes::Regex::new(r"^[A-Za-z]+$").unwrap();
167        let alpha_pattern = ByteStringPattern::regex(alpha_regex);
168        assert!(alpha_pattern.matches(&hello_cbor));
169        assert!(alpha_pattern.matches(&world_cbor));
170        assert!(!alpha_pattern.matches(&digits_cbor));
171        assert!(!alpha_pattern.matches(&mixed_cbor));
172        assert!(!alpha_pattern.matches(&text_cbor));
173    }
174
175    #[test]
176    fn test_byte_string_pattern_paths() {
177        let hello_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f]; // "Hello"
178        let hello_cbor = CBOR::to_byte_string(hello_bytes.clone());
179        let text_cbor = "Hello".to_cbor();
180
181        let any_pattern = ByteStringPattern::any();
182        let hello_paths = any_pattern.paths(&hello_cbor);
183        assert_eq!(hello_paths.len(), 1);
184        assert_eq!(hello_paths[0].len(), 1);
185        assert_eq!(hello_paths[0][0], hello_cbor);
186
187        let text_paths = any_pattern.paths(&text_cbor);
188        assert_eq!(text_paths.len(), 0);
189
190        let hello_pattern = ByteStringPattern::value(hello_bytes.clone());
191        let paths = hello_pattern.paths(&hello_cbor);
192        assert_eq!(paths.len(), 1);
193        assert_eq!(paths[0].len(), 1);
194        assert_eq!(paths[0][0], hello_cbor);
195
196        let no_match_paths = hello_pattern.paths(&text_cbor);
197        assert_eq!(no_match_paths.len(), 0);
198    }
199
200    #[test]
201    fn test_byte_string_pattern_equality() {
202        let any1 = ByteStringPattern::any();
203        let any2 = ByteStringPattern::any();
204        let value1 = ByteStringPattern::value(vec![1, 2, 3]);
205        let value2 = ByteStringPattern::value(vec![1, 2, 3]);
206        let value3 = ByteStringPattern::value(vec![4, 5, 6]);
207        let regex1 =
208            ByteStringPattern::regex(regex::bytes::Regex::new(r"\d+").unwrap());
209        let regex2 =
210            ByteStringPattern::regex(regex::bytes::Regex::new(r"\d+").unwrap());
211        let regex3 = ByteStringPattern::regex(
212            regex::bytes::Regex::new(r"[a-z]+").unwrap(),
213        );
214
215        // Test equality
216        assert_eq!(any1, any2);
217        assert_eq!(value1, value2);
218        assert_eq!(regex1, regex2);
219
220        // Test inequality
221        assert_ne!(any1, value1);
222        assert_ne!(value1, value3);
223        assert_ne!(regex1, regex3);
224        assert_ne!(value1, regex1);
225    }
226
227    #[test]
228    fn test_byte_string_pattern_regex_complex() {
229        // Test with binary data that looks like an email pattern
230        let email_bytes = b"test@example.com";
231        let email_cbor = CBOR::to_byte_string(email_bytes);
232        let not_email_bytes = b"not_an_email";
233        let not_email_cbor = CBOR::to_byte_string(not_email_bytes);
234
235        // Simple email regex pattern
236        let email_regex =
237            regex::bytes::Regex::new(r"^[^@]+@[^@]+\.[^@]+$").unwrap();
238        let email_pattern = ByteStringPattern::regex(email_regex);
239
240        assert!(email_pattern.matches(&email_cbor));
241        assert!(!email_pattern.matches(&not_email_cbor));
242    }
243
244    #[test]
245    fn test_byte_string_pattern_binary_data() {
246        // Test with actual binary data (not text)
247        let binary_data = vec![0x00, 0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD];
248        let binary_cbor = CBOR::to_byte_string(binary_data.clone());
249
250        let any_pattern = ByteStringPattern::any();
251        assert!(any_pattern.matches(&binary_cbor));
252
253        let exact_pattern = ByteStringPattern::value(binary_data.clone());
254        assert!(exact_pattern.matches(&binary_cbor));
255
256        let different_pattern =
257            ByteStringPattern::value(vec![0x00, 0x01, 0x02]);
258        assert!(!different_pattern.matches(&binary_cbor));
259
260        // Test regex that matches any bytes starting with 0x00
261        let starts_with_zero_regex =
262            regex::bytes::Regex::new(r"^\x00").unwrap();
263        let starts_with_zero_pattern =
264            ByteStringPattern::regex(starts_with_zero_regex);
265        assert!(starts_with_zero_pattern.matches(&binary_cbor));
266
267        // Test regex that doesn't match
268        let starts_with_one_regex = regex::bytes::Regex::new(r"^\x01").unwrap();
269        let starts_with_one_pattern =
270            ByteStringPattern::regex(starts_with_one_regex);
271        assert!(!starts_with_one_pattern.matches(&binary_cbor));
272    }
273}