dcbor_pattern/pattern/value/
bytestring_pattern.rs1use dcbor::prelude::*;
2
3use crate::pattern::{Matcher, Path, Pattern, vm::Instr};
4
5#[derive(Debug, Clone)]
7pub enum ByteStringPattern {
8 Any,
10 Value(Vec<u8>),
12 BinaryRegex(regex::bytes::Regex),
14}
15
16impl PartialEq for ByteStringPattern {
17 fn eq(&self, other: &Self) -> bool {
18 match (self, other) {
19 (ByteStringPattern::Any, ByteStringPattern::Any) => true,
20 (ByteStringPattern::Value(a), ByteStringPattern::Value(b)) => {
21 a == b
22 }
23 (
24 ByteStringPattern::BinaryRegex(a),
25 ByteStringPattern::BinaryRegex(b),
26 ) => a.as_str() == b.as_str(),
27 _ => false,
28 }
29 }
30}
31
32impl Eq for ByteStringPattern {}
33
34impl std::hash::Hash for ByteStringPattern {
35 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
36 match self {
37 ByteStringPattern::Any => {
38 0u8.hash(state);
39 }
40 ByteStringPattern::Value(s) => {
41 1u8.hash(state);
42 s.hash(state);
43 }
44 ByteStringPattern::BinaryRegex(regex) => {
45 2u8.hash(state);
46 regex.as_str().hash(state);
48 }
49 }
50 }
51}
52
53impl ByteStringPattern {
54 pub fn any() -> Self { ByteStringPattern::Any }
56
57 pub fn value(value: impl AsRef<[u8]>) -> Self {
59 ByteStringPattern::Value(value.as_ref().to_vec())
60 }
61
62 pub fn regex(regex: regex::bytes::Regex) -> Self {
65 ByteStringPattern::BinaryRegex(regex)
66 }
67}
68
69impl Matcher for ByteStringPattern {
70 fn paths(&self, haystack: &CBOR) -> Vec<Path> {
71 let is_hit =
72 haystack.as_byte_string().is_some_and(|bytes| match self {
73 ByteStringPattern::Any => true,
74 ByteStringPattern::Value(want) => bytes == want,
75 ByteStringPattern::BinaryRegex(regex) => regex.is_match(bytes),
76 });
77
78 if is_hit {
79 vec![vec![haystack.clone()]]
80 } else {
81 vec![]
82 }
83 }
84
85 fn compile(
86 &self,
87 code: &mut Vec<Instr>,
88 literals: &mut Vec<Pattern>,
89 _captures: &mut Vec<String>,
90 ) {
91 let idx = literals.len();
92 literals.push(Pattern::Value(
93 crate::pattern::ValuePattern::ByteString(self.clone()),
94 ));
95 code.push(Instr::MatchPredicate(idx));
96 }
97}
98
99impl std::fmt::Display for ByteStringPattern {
100 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
101 match self {
102 ByteStringPattern::Any => write!(f, "bstr"),
103 ByteStringPattern::Value(value) => {
104 write!(f, "h'{}'", hex::encode(value))
105 }
106 ByteStringPattern::BinaryRegex(regex) => {
107 write!(f, "h'/{}/'", regex.as_str())
108 }
109 }
110 }
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116
117 #[test]
118 fn test_byte_string_pattern_display() {
119 assert_eq!(ByteStringPattern::any().to_string(), "bstr");
120 assert_eq!(
121 ByteStringPattern::value(vec![1, 2, 3]).to_string(),
122 r#"h'010203'"#
123 );
124 assert_eq!(
125 ByteStringPattern::regex(
126 regex::bytes::Regex::new(r"^\d+$").unwrap()
127 )
128 .to_string(),
129 r#"h'/^\d+$/'"#
130 );
131 }
132
133 #[test]
134 fn test_byte_string_pattern_matching() {
135 let hello_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f]; let hello_cbor = CBOR::to_byte_string(hello_bytes.clone());
137 let world_bytes = vec![0x57, 0x6f, 0x72, 0x6c, 0x64]; let world_cbor = CBOR::to_byte_string(world_bytes.clone());
139 let digits_bytes = vec![0x31, 0x32, 0x33, 0x34, 0x35]; let digits_cbor = CBOR::to_byte_string(digits_bytes.clone());
141 let mixed_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x31, 0x32, 0x33]; let mixed_cbor = CBOR::to_byte_string(mixed_bytes.clone());
143 let text_cbor = "Hello".to_cbor();
144
145 let any_pattern = ByteStringPattern::any();
147 assert!(any_pattern.matches(&hello_cbor));
148 assert!(any_pattern.matches(&world_cbor));
149 assert!(any_pattern.matches(&digits_cbor));
150 assert!(any_pattern.matches(&mixed_cbor));
151 assert!(!any_pattern.matches(&text_cbor));
152
153 let hello_pattern = ByteStringPattern::value(hello_bytes.clone());
155 assert!(hello_pattern.matches(&hello_cbor));
156 assert!(!hello_pattern.matches(&world_cbor));
157 assert!(!hello_pattern.matches(&text_cbor));
158
159 let digits_regex = regex::bytes::Regex::new(r"^\d+$").unwrap();
161 let digits_pattern = ByteStringPattern::regex(digits_regex);
162 assert!(!digits_pattern.matches(&hello_cbor));
163 assert!(!digits_pattern.matches(&world_cbor));
164 assert!(digits_pattern.matches(&digits_cbor));
165 assert!(!digits_pattern.matches(&mixed_cbor));
166 assert!(!digits_pattern.matches(&text_cbor));
167
168 let alpha_regex = regex::bytes::Regex::new(r"^[A-Za-z]+$").unwrap();
169 let alpha_pattern = ByteStringPattern::regex(alpha_regex);
170 assert!(alpha_pattern.matches(&hello_cbor));
171 assert!(alpha_pattern.matches(&world_cbor));
172 assert!(!alpha_pattern.matches(&digits_cbor));
173 assert!(!alpha_pattern.matches(&mixed_cbor));
174 assert!(!alpha_pattern.matches(&text_cbor));
175 }
176
177 #[test]
178 fn test_byte_string_pattern_paths() {
179 let hello_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f]; let hello_cbor = CBOR::to_byte_string(hello_bytes.clone());
181 let text_cbor = "Hello".to_cbor();
182
183 let any_pattern = ByteStringPattern::any();
184 let hello_paths = any_pattern.paths(&hello_cbor);
185 assert_eq!(hello_paths.len(), 1);
186 assert_eq!(hello_paths[0].len(), 1);
187 assert_eq!(hello_paths[0][0], hello_cbor);
188
189 let text_paths = any_pattern.paths(&text_cbor);
190 assert_eq!(text_paths.len(), 0);
191
192 let hello_pattern = ByteStringPattern::value(hello_bytes.clone());
193 let paths = hello_pattern.paths(&hello_cbor);
194 assert_eq!(paths.len(), 1);
195 assert_eq!(paths[0].len(), 1);
196 assert_eq!(paths[0][0], hello_cbor);
197
198 let no_match_paths = hello_pattern.paths(&text_cbor);
199 assert_eq!(no_match_paths.len(), 0);
200 }
201
202 #[test]
203 fn test_byte_string_pattern_equality() {
204 let any1 = ByteStringPattern::any();
205 let any2 = ByteStringPattern::any();
206 let value1 = ByteStringPattern::value(vec![1, 2, 3]);
207 let value2 = ByteStringPattern::value(vec![1, 2, 3]);
208 let value3 = ByteStringPattern::value(vec![4, 5, 6]);
209 let regex1 =
210 ByteStringPattern::regex(regex::bytes::Regex::new(r"\d+").unwrap());
211 let regex2 =
212 ByteStringPattern::regex(regex::bytes::Regex::new(r"\d+").unwrap());
213 let regex3 = ByteStringPattern::regex(
214 regex::bytes::Regex::new(r"[a-z]+").unwrap(),
215 );
216
217 assert_eq!(any1, any2);
219 assert_eq!(value1, value2);
220 assert_eq!(regex1, regex2);
221
222 assert_ne!(any1, value1);
224 assert_ne!(value1, value3);
225 assert_ne!(regex1, regex3);
226 assert_ne!(value1, regex1);
227 }
228
229 #[test]
230 fn test_byte_string_pattern_regex_complex() {
231 let email_bytes = b"test@example.com";
233 let email_cbor = CBOR::to_byte_string(email_bytes);
234 let not_email_bytes = b"not_an_email";
235 let not_email_cbor = CBOR::to_byte_string(not_email_bytes);
236
237 let email_regex =
239 regex::bytes::Regex::new(r"^[^@]+@[^@]+\.[^@]+$").unwrap();
240 let email_pattern = ByteStringPattern::regex(email_regex);
241
242 assert!(email_pattern.matches(&email_cbor));
243 assert!(!email_pattern.matches(¬_email_cbor));
244 }
245
246 #[test]
247 fn test_byte_string_pattern_binary_data() {
248 let binary_data = vec![0x00, 0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD];
250 let binary_cbor = CBOR::to_byte_string(binary_data.clone());
251
252 let any_pattern = ByteStringPattern::any();
253 assert!(any_pattern.matches(&binary_cbor));
254
255 let exact_pattern = ByteStringPattern::value(binary_data.clone());
256 assert!(exact_pattern.matches(&binary_cbor));
257
258 let different_pattern =
259 ByteStringPattern::value(vec![0x00, 0x01, 0x02]);
260 assert!(!different_pattern.matches(&binary_cbor));
261
262 let starts_with_zero_regex =
264 regex::bytes::Regex::new(r"^\x00").unwrap();
265 let starts_with_zero_pattern =
266 ByteStringPattern::regex(starts_with_zero_regex);
267 assert!(starts_with_zero_pattern.matches(&binary_cbor));
268
269 let starts_with_one_regex = regex::bytes::Regex::new(r"^\x01").unwrap();
271 let starts_with_one_pattern =
272 ByteStringPattern::regex(starts_with_one_regex);
273 assert!(!starts_with_one_pattern.matches(&binary_cbor));
274 }
275}