1#![deny(warnings)]
51#![recursion_limit = "1024"]
52#![deny(
53 clippy::all,
54 clippy::unwrap_used,
55 clippy::unnecessary_unwrap,
56 clippy::pedantic
57)]
58#![allow(clippy::must_use_candidate)]
59
60use serde::{Deserialize, Serialize};
61use simd_json::prelude::{MutableObject, *};
62use std::fmt;
63
64#[derive(Debug, PartialEq, Eq)]
65pub enum Error {
66 InvalidPattern(usize),
67 DoubleSeperator(String),
68 InvalidEscape(char),
69 UnterminatedEscape,
70}
71impl fmt::Display for Error {
72 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73 match self {
74 Self::InvalidPattern(p) => write!(f, "invalid pattern at character {p}"),
75 Self::DoubleSeperator(s) => {
76 write!(
77 f,
78 "The seperator '{s}' is used for both key value seperation as well as pair seperation."
79 )
80 }
81 Self::InvalidEscape(s) => write!(f, "Invalid escape sequence \\'{s}' is not valid."),
82 Self::UnterminatedEscape => write!(
83 f,
84 "Unterminated escape at the end of line or of a delimiter %{{ can't be escaped"
85 ),
86 }
87 }
88}
89
90impl std::error::Error for Error {}
91
92#[derive(PartialEq, Debug, Clone, Serialize, Deserialize, Eq)]
93pub struct Pattern {
94 field_seperators: Vec<String>,
95 key_seperators: Vec<String>,
96}
97
98impl std::default::Default for Pattern {
99 fn default() -> Self {
100 Self {
101 field_seperators: vec![" ".to_string()],
102 key_seperators: vec![":".to_string()],
103 }
104 }
105}
106
107fn handle_escapes(s: &str) -> Result<String, Error> {
108 let mut res = String::with_capacity(s.len());
109 let mut cs = s.chars();
110 while let Some(c) = cs.next() {
111 match c {
112 '\\' => {
113 if let Some(c1) = cs.next() {
114 match c1 {
115 '\\' => res.push(c1),
116 'n' => res.push('\n'),
117 't' => res.push('\t'),
118 'r' => res.push('\r'),
119 other => return Err(Error::InvalidEscape(other)),
120 }
121 } else {
122 return Err(Error::UnterminatedEscape);
123 }
124 }
125 c => res.push(c),
126 }
127 }
128 Ok(res)
129}
130
131impl Pattern {
132 pub fn compile(pattern: &str) -> Result<Self, Error> {
136 let mut field_seperators = Vec::new();
137 let mut key_seperators = Vec::new();
138 let mut i = 0;
139 loop {
140 if pattern[i..].starts_with("%{key}") {
141 i += 6;
142 if let Some(i1) = pattern[i..].find("%{val}") {
143 if i1 != 0 {
144 key_seperators.push(handle_escapes(&pattern[i..i + i1])?);
145 }
146 i += i1 + 6;
147 } else {
148 return Err(Error::InvalidPattern(i));
149 }
150 } else if let Some(i1) = pattern[i..].find("%{key}") {
151 if i1 != 0 {
152 field_seperators.push(handle_escapes(&pattern[i..i + i1])?);
153 }
154 i += i1;
155 } else if pattern[i..].is_empty() {
156 break;
157 } else {
158 field_seperators.push(handle_escapes(&pattern[i..])?);
159 break;
160 }
161 }
162 if field_seperators.is_empty() {
163 field_seperators.push(" ".to_string());
164 }
165 if key_seperators.is_empty() {
166 key_seperators.push(":".to_string());
167 }
168 field_seperators.sort();
169 key_seperators.sort();
170 field_seperators.dedup();
171 key_seperators.dedup();
172
173 for fs in &field_seperators {
174 if key_seperators.iter().any(|ks| ks.contains(fs)) {
175 return Err(Error::DoubleSeperator(fs.to_string()));
176 }
177
178 if field_seperators
179 .iter()
180 .any(|fs2| fs2 != fs && fs2.contains(fs))
181 {
182 return Err(Error::DoubleSeperator(fs.to_string()));
183 }
184 }
185
186 for ks in &key_seperators {
187 if field_seperators.iter().any(|fs| fs.contains(ks)) {
188 return Err(Error::DoubleSeperator(ks.to_string()));
189 }
190
191 if key_seperators
192 .iter()
193 .any(|ks2| ks2 != ks && ks2.contains(ks))
194 {
195 return Err(Error::DoubleSeperator(ks.to_string()));
196 }
197 }
198
199 Ok(Self {
200 field_seperators,
201 key_seperators,
202 })
203 }
204 pub fn run<'input, V>(&self, input: &'input str) -> Option<V>
210 where
211 V: ValueBuilder<'input> + MutableObject + 'input,
212 <V as MutableObject>::Key: std::hash::Hash + Eq + From<&'input str>,
213 <V as MutableObject>::Target: std::convert::From<&'input str>,
214 {
215 let mut r = V::object();
216 let mut empty = true;
217 for field in multi_split(input, &self.field_seperators) {
218 let kv: Vec<&str> = multi_split(field, &self.key_seperators);
219 if kv.len() == 2 {
220 empty = false;
221 r.insert(kv[0], kv[1]).ok()?;
222 }
223 }
224 if empty { None } else { Some(r) }
225 }
226}
227
228fn multi_split<'input>(input: &'input str, seperators: &[String]) -> Vec<&'input str> {
229 use std::mem;
230 let mut i: Vec<&str> = vec![input];
231 let mut i1 = vec![];
232 let mut r: Vec<&str>;
233 for s in seperators {
234 i1.clear();
235 for e in &i {
236 r = e.split(s.as_str()).collect();
237 i1.append(&mut r);
238 }
239 mem::swap(&mut i, &mut i1);
240 }
241 i
242}
243
244#[cfg(test)]
245mod test {
246 use super::*;
247 use simd_json::BorrowedValue;
248 use simd_json::borrowed::Object;
249
250 #[test]
251 fn default() {
252 let d = Pattern::default();
253 let p = Pattern::compile("%{key}:%{val}").expect("compile");
254 assert_eq!(d, p);
255 }
256 #[test]
257 fn test_multisplit() {
258 let seps = vec![String::from(" "), String::from(";")];
259 let input = "this=is;a=test for:seperators";
260
261 let i = multi_split(input, &seps);
262 assert_eq!(i, vec!["this=is", "a=test", "for:seperators"]);
263 }
264
265 #[test]
266 fn simple_split() {
267 let kv = Pattern::compile("%{key}=%{val}").expect("Failed to build pattern");
268 let r: BorrowedValue = kv.run("this=is a=test").expect("Failed to split input");
269 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 2);
270 assert_eq!(r["this"], "is");
271 assert_eq!(r["a"], "test");
272 }
273
274 #[test]
275 fn simple_split2() {
276 let kv = Pattern::compile("&%{key}=%{val}").expect("Failed to build pattern");
277 let r: BorrowedValue = kv.run("this=is&a=test").expect("Failed to split input");
278 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 2);
279 assert_eq!(r["this"], "is");
280 assert_eq!(r["a"], "test");
281 }
282 #[test]
283 fn newline_simple_() {
284 let kv = Pattern::compile(r"\n%{key}=%{val}").expect("Failed to build pattern");
285 let r: BorrowedValue = kv.run("this=is\na=test").expect("Failed to split input");
286 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 2);
287 assert_eq!(r["this"], "is");
288 assert_eq!(r["a"], "test");
289 }
290
291 #[test]
292 fn simple_split3() {
293 let kv = Pattern::compile("&").expect("Failed to build pattern");
294 let r: BorrowedValue = kv.run("this:is&a:test").expect("Failed to split input");
295 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 2);
296 assert_eq!(r["this"], "is");
297 assert_eq!(r["a"], "test");
298 }
299
300 #[test]
301 fn simple_split4() {
302 let kv = Pattern::compile("%{key}%{%{val}").expect("Failed to build pattern");
303 let r: BorrowedValue = kv.run("this%{is a%{test").expect("Failed to split input");
304 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 2);
305 assert_eq!(r["this"], "is");
306 assert_eq!(r["a"], "test");
307 }
308
309 #[test]
310 fn simple_split5() {
311 let kv = Pattern::compile("%{key}%{key}%{val}").expect("Failed to build pattern");
312 dbg!(&kv);
313 let r: BorrowedValue = kv
314 .run("this%{key}is a%{key}test")
315 .expect("Failed to split input");
316 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 2);
317 assert_eq!(r["this"], "is");
318 assert_eq!(r["a"], "test");
319 }
320
321 #[test]
322 fn invalid_pattern() {
323 let kv = Pattern::compile("%{key} ");
324 let e = kv.expect_err("no error");
325 assert_eq!(e, Error::InvalidPattern(6));
326 println!("{e}");
327
328 let kv = Pattern::compile("%{key} %{val} \\8");
329 let e = kv.expect_err("no error");
330 assert_eq!(e, Error::InvalidEscape('8'));
331 println!("{e}");
332
333 let kv = Pattern::compile("%{key} %{val} ");
334 let e = kv.expect_err("no error");
335 assert_eq!(e, Error::DoubleSeperator(String::from(" ")));
336 println!("{e}");
337
338 let kv = Pattern::compile("%{key}=%{val} %{key}==%{val}");
339 let e = kv.expect_err("no error");
340 assert_eq!(e, Error::DoubleSeperator(String::from("=")));
341 println!("{e}");
342
343 let kv = Pattern::compile("%{key}=%{val}; %{key}:%{val} %{key}:%{val}");
344 let e = kv.expect_err("no error");
345 assert_eq!(e, Error::DoubleSeperator(String::from(" ")));
346 println!("{e}");
347
348 let kv = Pattern::compile("%{key}=%{val};%{key}:%{val} :%{key}:%{val}");
349 let e = kv.expect_err("no error");
350 assert_eq!(e, Error::DoubleSeperator(String::from(":")));
351 println!("{e}");
352 }
353 #[test]
354 fn one_field() {
355 let kv = Pattern::compile("%{key}=%{val}").expect("Failed to build pattern");
356 let r: BorrowedValue = kv.run("this=is").expect("Failed to split input");
357 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 1);
358 assert_eq!(r["this"], "is");
359 }
360
361 #[test]
362 fn no_split() {
363 let kv = Pattern::compile("%{key}=%{val}").expect("Failed to build pattern");
364 let r: Option<BorrowedValue> = kv.run("this is a test");
365 assert!(r.is_none());
366 }
367
368 #[test]
369 fn different_seperators() {
370 let kv = Pattern::compile("%{key}=%{val};%{key}:%{val} %{key}:%{val}")
371 .expect("Failed to build pattern");
372 dbg!(&kv);
373 let r: BorrowedValue = kv
374 .run("this=is;a=test for:seperators")
375 .expect("Failed to split input");
376 dbg!(&r);
377 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 3);
378 assert_eq!(r["this"], "is");
379 assert_eq!(r["a"], "test");
380 assert_eq!(r["for"], "seperators");
381 }
382
383 #[test]
384 fn different_seperators2() {
385 let kv = Pattern::compile("%{key}=%{val}%{key}:%{val} %{key}:%{val};")
386 .expect("Failed to build pattern");
387 let r: BorrowedValue = kv
388 .run("this=is;a=test for:seperators")
389 .expect("Failed to split input");
390 dbg!(&r);
391 dbg!(&kv);
392 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 3);
393 assert_eq!(r["this"], "is");
394 assert_eq!(r["a"], "test");
395 assert_eq!(r["for"], "seperators");
396 }
397
398 #[test]
399 fn invalid_pattern2() {
400 let kv = Pattern::compile("%{key}=%{val};%{key}:%{val} %{key}:%{val}")
401 .expect("Failed to build pattern");
402 let r: BorrowedValue = kv
403 .run("this=is;a=test for:seperators")
404 .expect("Failed to split input");
405 dbg!(&r);
406 dbg!(&kv);
407 assert_eq!(r.as_object().map(Object::len).unwrap_or_default(), 3);
408 assert_eq!(r["this"], "is");
409 assert_eq!(r["a"], "test");
410 assert_eq!(r["for"], "seperators");
411 }
412
413 #[test]
414 fn unfinished_escape_in_pattern() {
415 let res = Pattern::compile(r"%{key}=%{val}; \\\r\n\t\");
416 assert_eq!(Err(Error::UnterminatedEscape), res);
417 if let Err(e) = res {
418 assert_eq!(
419 "Unterminated escape at the end of line or of a delimiter %{ can't be escaped",
420 &e.to_string()
421 );
422 }
423 }
424}