dcbor_pattern/pattern/value/
bytestring_pattern.rs1use dcbor::prelude::*;
2
3use crate::pattern::{Matcher, Path, Pattern, vm::Instr};
4
5#[derive(Debug, Clone)]
7pub enum ByteStringPattern {
8 Any,
10 Value(Vec<u8>),
12 BinaryRegex(regex::bytes::Regex),
14}
15
16impl PartialEq for ByteStringPattern {
17 fn eq(&self, other: &Self) -> bool {
18 match (self, other) {
19 (ByteStringPattern::Any, ByteStringPattern::Any) => true,
20 (ByteStringPattern::Value(a), ByteStringPattern::Value(b)) => {
21 a == b
22 }
23 (ByteStringPattern::BinaryRegex(a), ByteStringPattern::BinaryRegex(b)) => {
24 a.as_str() == b.as_str()
25 }
26 _ => false,
27 }
28 }
29}
30
31impl Eq for ByteStringPattern {}
32
33impl std::hash::Hash for ByteStringPattern {
34 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
35 match self {
36 ByteStringPattern::Any => {
37 0u8.hash(state);
38 }
39 ByteStringPattern::Value(s) => {
40 1u8.hash(state);
41 s.hash(state);
42 }
43 ByteStringPattern::BinaryRegex(regex) => {
44 2u8.hash(state);
45 regex.as_str().hash(state);
47 }
48 }
49 }
50}
51
52impl ByteStringPattern {
53 pub fn any() -> Self { ByteStringPattern::Any }
55
56 pub fn value(value: impl AsRef<[u8]>) -> Self {
58 ByteStringPattern::Value(value.as_ref().to_vec())
59 }
60
61 pub fn regex(regex: regex::bytes::Regex) -> Self {
64 ByteStringPattern::BinaryRegex(regex)
65 }
66}
67
68impl Matcher for ByteStringPattern {
69 fn paths(&self, haystack: &CBOR) -> Vec<Path> {
70 let is_hit = haystack.as_byte_string().is_some_and(|bytes| match self {
71 ByteStringPattern::Any => true,
72 ByteStringPattern::Value(want) => bytes == want,
73 ByteStringPattern::BinaryRegex(regex) => regex.is_match(bytes),
74 });
75
76 if is_hit {
77 vec![vec![haystack.clone()]]
78 } else {
79 vec![]
80 }
81 }
82
83 fn compile(
84 &self,
85 code: &mut Vec<Instr>,
86 literals: &mut Vec<Pattern>,
87 _captures: &mut Vec<String>,
88 ) {
89 let idx = literals.len();
90 literals.push(Pattern::Value(
91 crate::pattern::ValuePattern::ByteString(self.clone()),
92 ));
93 code.push(Instr::MatchPredicate(idx));
94 }
95}
96
97impl std::fmt::Display for ByteStringPattern {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 match self {
100 ByteStringPattern::Any => write!(f, "bstr"),
101 ByteStringPattern::Value(value) => {
102 write!(f, "h'{}'", hex::encode(value))
103 }
104 ByteStringPattern::BinaryRegex(regex) => {
105 write!(f, "h'/{}/'", regex.as_str())
106 }
107 }
108 }
109}
110
111#[cfg(test)]
112mod tests {
113 use super::*;
114
115 #[test]
116 fn test_byte_string_pattern_display() {
117 assert_eq!(ByteStringPattern::any().to_string(), "bstr");
118 assert_eq!(
119 ByteStringPattern::value(vec![1, 2, 3]).to_string(),
120 r#"h'010203'"#
121 );
122 assert_eq!(
123 ByteStringPattern::regex(
124 regex::bytes::Regex::new(r"^\d+$").unwrap()
125 )
126 .to_string(),
127 r#"h'/^\d+$/'"#
128 );
129 }
130
131 #[test]
132 fn test_byte_string_pattern_matching() {
133 let hello_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f]; let hello_cbor = CBOR::to_byte_string(hello_bytes.clone());
135 let world_bytes = vec![0x57, 0x6f, 0x72, 0x6c, 0x64]; let world_cbor = CBOR::to_byte_string(world_bytes.clone());
137 let digits_bytes = vec![0x31, 0x32, 0x33, 0x34, 0x35]; let digits_cbor = CBOR::to_byte_string(digits_bytes.clone());
139 let mixed_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x31, 0x32, 0x33]; let mixed_cbor = CBOR::to_byte_string(mixed_bytes.clone());
141 let text_cbor = "Hello".to_cbor();
142
143 let any_pattern = ByteStringPattern::any();
145 assert!(any_pattern.matches(&hello_cbor));
146 assert!(any_pattern.matches(&world_cbor));
147 assert!(any_pattern.matches(&digits_cbor));
148 assert!(any_pattern.matches(&mixed_cbor));
149 assert!(!any_pattern.matches(&text_cbor));
150
151 let hello_pattern = ByteStringPattern::value(hello_bytes.clone());
153 assert!(hello_pattern.matches(&hello_cbor));
154 assert!(!hello_pattern.matches(&world_cbor));
155 assert!(!hello_pattern.matches(&text_cbor));
156
157 let digits_regex = regex::bytes::Regex::new(r"^\d+$").unwrap();
159 let digits_pattern = ByteStringPattern::regex(digits_regex);
160 assert!(!digits_pattern.matches(&hello_cbor));
161 assert!(!digits_pattern.matches(&world_cbor));
162 assert!(digits_pattern.matches(&digits_cbor));
163 assert!(!digits_pattern.matches(&mixed_cbor));
164 assert!(!digits_pattern.matches(&text_cbor));
165
166 let alpha_regex = regex::bytes::Regex::new(r"^[A-Za-z]+$").unwrap();
167 let alpha_pattern = ByteStringPattern::regex(alpha_regex);
168 assert!(alpha_pattern.matches(&hello_cbor));
169 assert!(alpha_pattern.matches(&world_cbor));
170 assert!(!alpha_pattern.matches(&digits_cbor));
171 assert!(!alpha_pattern.matches(&mixed_cbor));
172 assert!(!alpha_pattern.matches(&text_cbor));
173 }
174
175 #[test]
176 fn test_byte_string_pattern_paths() {
177 let hello_bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f]; let hello_cbor = CBOR::to_byte_string(hello_bytes.clone());
179 let text_cbor = "Hello".to_cbor();
180
181 let any_pattern = ByteStringPattern::any();
182 let hello_paths = any_pattern.paths(&hello_cbor);
183 assert_eq!(hello_paths.len(), 1);
184 assert_eq!(hello_paths[0].len(), 1);
185 assert_eq!(hello_paths[0][0], hello_cbor);
186
187 let text_paths = any_pattern.paths(&text_cbor);
188 assert_eq!(text_paths.len(), 0);
189
190 let hello_pattern = ByteStringPattern::value(hello_bytes.clone());
191 let paths = hello_pattern.paths(&hello_cbor);
192 assert_eq!(paths.len(), 1);
193 assert_eq!(paths[0].len(), 1);
194 assert_eq!(paths[0][0], hello_cbor);
195
196 let no_match_paths = hello_pattern.paths(&text_cbor);
197 assert_eq!(no_match_paths.len(), 0);
198 }
199
200 #[test]
201 fn test_byte_string_pattern_equality() {
202 let any1 = ByteStringPattern::any();
203 let any2 = ByteStringPattern::any();
204 let value1 = ByteStringPattern::value(vec![1, 2, 3]);
205 let value2 = ByteStringPattern::value(vec![1, 2, 3]);
206 let value3 = ByteStringPattern::value(vec![4, 5, 6]);
207 let regex1 =
208 ByteStringPattern::regex(regex::bytes::Regex::new(r"\d+").unwrap());
209 let regex2 =
210 ByteStringPattern::regex(regex::bytes::Regex::new(r"\d+").unwrap());
211 let regex3 = ByteStringPattern::regex(
212 regex::bytes::Regex::new(r"[a-z]+").unwrap(),
213 );
214
215 assert_eq!(any1, any2);
217 assert_eq!(value1, value2);
218 assert_eq!(regex1, regex2);
219
220 assert_ne!(any1, value1);
222 assert_ne!(value1, value3);
223 assert_ne!(regex1, regex3);
224 assert_ne!(value1, regex1);
225 }
226
227 #[test]
228 fn test_byte_string_pattern_regex_complex() {
229 let email_bytes = b"test@example.com";
231 let email_cbor = CBOR::to_byte_string(email_bytes);
232 let not_email_bytes = b"not_an_email";
233 let not_email_cbor = CBOR::to_byte_string(not_email_bytes);
234
235 let email_regex =
237 regex::bytes::Regex::new(r"^[^@]+@[^@]+\.[^@]+$").unwrap();
238 let email_pattern = ByteStringPattern::regex(email_regex);
239
240 assert!(email_pattern.matches(&email_cbor));
241 assert!(!email_pattern.matches(¬_email_cbor));
242 }
243
244 #[test]
245 fn test_byte_string_pattern_binary_data() {
246 let binary_data = vec![0x00, 0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD];
248 let binary_cbor = CBOR::to_byte_string(binary_data.clone());
249
250 let any_pattern = ByteStringPattern::any();
251 assert!(any_pattern.matches(&binary_cbor));
252
253 let exact_pattern = ByteStringPattern::value(binary_data.clone());
254 assert!(exact_pattern.matches(&binary_cbor));
255
256 let different_pattern =
257 ByteStringPattern::value(vec![0x00, 0x01, 0x02]);
258 assert!(!different_pattern.matches(&binary_cbor));
259
260 let starts_with_zero_regex =
262 regex::bytes::Regex::new(r"^\x00").unwrap();
263 let starts_with_zero_pattern =
264 ByteStringPattern::regex(starts_with_zero_regex);
265 assert!(starts_with_zero_pattern.matches(&binary_cbor));
266
267 let starts_with_one_regex = regex::bytes::Regex::new(r"^\x01").unwrap();
269 let starts_with_one_pattern =
270 ByteStringPattern::regex(starts_with_one_regex);
271 assert!(!starts_with_one_pattern.matches(&binary_cbor));
272 }
273}