de_regex/
lib.rs

1/*!
2
3# De-Regex
4
5This crate contains a library that deserializes a string into a struct based on a regular expression and serde.
6
7## Example: Parse image dimension into struct
8
9```rust
10# fn main() -> Result<(), de_regex::Error> {
11use serde::Deserialize;
12
13#[derive(Deserialize)]
14struct Dimension {
15    width: u32,
16    height: u32
17}
18
19let pattern = r"^(?P<width>\d+)x(?P<height>\d+)$";
20let input = "800x600";
21
22let dim: Dimension = de_regex::from_str(input, pattern)?;
23
24assert_eq!(dim.width, 800);
25assert_eq!(dim.height, 600);
26# Ok(())
27# }
28```
29## Supported data types
30
31The following data types can be used as struct fields.
32
33- **bool**: Supported values are `true` or `false` case insensitive<br>
34            Example pattern: `^(?P<group_name>(?i)(true|false))$`
35
36- **u8, u16, u32, u64**: Decimal values prefixed with an optional `+`<br>
37            Example pattern: `^(?P<group_name>\+?\d+)$`
38
39- **i8, i16, i32, i64**: Decimal values prefixed with an optional `+`<br>
40            Example pattern: `^(?P<group_name>[-+]?\d+)$`
41
42- **f32, f64**: See the documentation of the [FromStr](https://doc.rust-lang.org/std/primitive.f32.html#impl-FromStr) implementation of f32/f64 for the valid syntax<br>
43            Example pattern for simple decimal floats: `^(?P<group_name>[-+]?\d+(\.\d*)?)$`
44
45- **String**: A unicode (utf8) string value.<br>
46            Example pattern: `^(?P<group_name>\w*)$`
47
48- **Tuple struct**: A tuple struct with one field (New Type Idiom). The struct needs to implement ´Deserialize´:
49    ```rust
50      # use serde::Deserialize;
51      #[derive(Deserialize)]
52      struct NewType(i32);
53    ```
54
55- **Enum**: Enums with unit variants (No newtypes and variant fields). The enum needs to implement ´Deserialize´:
56    ```rust
57      # use serde::Deserialize;
58      #[derive(Deserialize)]
59      enum SomeEnum {
60        Foo,
61        #[serde(rename = "bar")]
62        Bar,
63        Baz(i32) // <- Will compile but is not available in matching because of newtype
64      };
65    ```
66
67- **Option<>**: All types above can be used as an optional value
68
69Other data types supported by `serde` might work but are not officially supported and tested.
70
71### Words of wisdom
72
73If your regular expression looks like a behemoth no mere mortal will ever understand, please reconsider using this crate
74*/
75
76/*
77 * IMPLEMENTATION NOTES
78 *
79 * The implementation is based on two implementations of serde::de::Deserializer
80 *
81 * 1. struct Deserializer:
82 *    The toplevel deserializer that implements the deserialization of "maps".
83 *    The regular expressions are matched against the input here and passed
84 *    into serde::de::value::MapDeserializer for further processing of the map/struct.
85 *
86 * 2. struct Value:
87 *    Responsible to deserialize struct members.
88 *    For most types parsing is based on std::str::FromStr
89 *
90 */
91
92mod error;
93mod de;
94
95pub use error::Error;
96
97use serde::Deserialize;
98use regex::Regex;
99
100/// Deserialize an input string into a struct.
101///
102/// # Example
103/// ```rust
104/// # fn main() -> Result<(), de_regex::Error> {
105/// use serde::Deserialize;
106///
107/// #[derive(Deserialize)]
108/// struct Dimension {
109///     width: u32,
110///     height: u32
111/// }
112///
113/// let pattern = r"^(?P<width>\d+)x(?P<height>\d+)$";
114/// let input = "800x600";
115///
116/// let dim: Dimension = de_regex::from_str(input, pattern)?;
117///
118/// assert_eq!(dim.width, 800);
119/// assert_eq!(dim.height, 600);
120/// # Ok(())
121/// # }
122/// ```
123pub fn from_str<'a, T>(input: &'a str, regex: &str) -> std::result::Result<T, Error> where T: Deserialize<'a> {
124    let regex = Regex::new(&regex).map_err(Error::BadRegex)?;
125    from_str_regex(input, regex)
126}
127
128/// Deserialize an input string into a struct.
129///
130/// # Example
131/// ```rust
132/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
133/// use serde::Deserialize;
134/// use regex::Regex;
135///
136/// #[derive(Deserialize)]
137/// struct Dimension {
138///     width: u32,
139///     height: u32
140/// }
141///
142/// let pattern = Regex::new(r"^(?P<width>\d+)x(?P<height>\d+)$")?;
143/// let input = "800x600";
144///
145/// let dim: Dimension = de_regex::from_str_regex(input, pattern)?;
146///
147/// assert_eq!(dim.width, 800);
148/// assert_eq!(dim.height, 600);
149/// # Ok(())
150/// # }
151/// ```
152pub fn from_str_regex<'a, T>(input: &'a str, regex: Regex) -> std::result::Result<T, Error> where T: Deserialize<'a> {
153    let mut deserializer = de::Deserializer::new(input, regex);
154    T::deserialize(&mut deserializer)
155}
156
157#[cfg(test)]
158mod test {
159    use super::*;
160    use super::error::Result;
161
162    #[derive(Deserialize, PartialEq, Debug)]
163    struct Test {
164        foo: u32,
165        bar: i32,
166    }
167
168    #[test]
169    fn test_simple() {
170        let regex = r"^(?P<foo>\d*),(?P<bar>-?\d*)$";
171        let input = "1,-2";
172        let output: Test = from_str(input, regex).unwrap();
173
174        assert_eq!(output, Test { foo: 1, bar: -2 });
175    }
176
177    #[test]
178    fn test_bad_regex() {
179        let regex = r"^(?P<foo\d*),(?P<bar>\d*)$";
180        let input = "1,-2";
181        let output: Result<Test> = from_str(input, regex);
182
183        assert!(output.is_err());
184    }
185
186    #[test]
187    fn test_bad_input() {
188        let regex = r"^(?P<foo\d*),(?P<bar>\d*)$";
189        let input = "";
190        let output: Result<Test> = from_str(input, regex);
191
192        assert!(output.is_err());
193    }
194
195    #[test]
196    fn test_missing_group() {
197        let regex = r"^(?P<foo>\d*)$";
198        let input = "1";
199        let output: Result<Test> = from_str(input, regex);
200
201        assert!(output.is_err());
202    }
203
204    #[test]
205    fn test_explicit_positive_int() {
206        let regex = r"^(?P<foo>\+?\d*),(?P<bar>[-+]?\d*)$";
207        let input = "+1,+2";
208        let output: Test = from_str(input, regex).unwrap();
209
210        assert_eq!(output, Test { foo: 1, bar: 2 });
211    }
212
213    #[derive(Deserialize, PartialEq, Debug)]
214    struct Test2 {
215        f_bool: bool,
216        f_u8: u8,
217        f_u16: u16,
218        f_u32: u32,
219        f_u64: u64,
220        f_i8: i8,
221        f_i16: i16,
222        f_i32: i32,
223        f_i64: i64,
224        f_f32: f32,
225        f_f64: f64,
226        f_str: String,
227    }
228
229    const TEST2_PATTERN: &'static str = r"^(?P<f_bool>\w*),(?P<f_u8>\d*),(?P<f_u16>\d*),(?P<f_u32>\d*),(?P<f_u64>\d*),(?P<f_i8>-?\d*),(?P<f_i16>-?\d*),(?P<f_i32>-?\d*),(?P<f_i64>-?\d*),(?P<f_f32>-?\d*\.?\d?),(?P<f_f64>-?\d*\.?\d?),(?P<f_str>\w*)$";
230
231    #[test]
232    fn test_supported_types() {
233        let input = "true,1,2,3,4,-1,-2,-3,-4,1.0,-1.0,foobar";
234        let output: Test2 = from_str(input, TEST2_PATTERN).unwrap();
235
236        assert_eq!(output, Test2 {
237            f_bool: true,
238            f_u8: 1,
239            f_u16: 2,
240            f_u32: 3,
241            f_u64: 4,
242            f_i8: -1,
243            f_i16: -2,
244            f_i32: -3,
245            f_i64: -4,
246            f_f32: 1.0,
247            f_f64: -1.0,
248            f_str: "foobar".to_owned(),
249        });
250    }
251
252    #[derive(Deserialize, PartialEq, Debug)]
253    struct Test3 {
254        foo: Option<u32>,
255        bar: Option<i32>,
256    }
257
258    #[test]
259    fn test_option() {
260        let regex = r"^(?P<foo>\d*),(?P<bar>-?\d*)$";
261        let input = "1,-2";
262        let output: Test3 = from_str(input, regex).unwrap();
263
264        assert_eq!(output, Test3 { foo: Some(1), bar: Some(-2) });
265    }
266
267    #[test]
268    fn test_option_none() {
269        let regex = r"^(?P<foo>\d*),(?P<bar>-?\d*)$";
270        let input = ",";
271        let output: Test3 = from_str(input, regex).unwrap();
272
273        assert_eq!(output, Test3 { foo: None, bar: None });
274    }
275
276    #[test]
277    fn test_bool() {
278        #[derive(Deserialize)]
279        struct TestBool {
280            v: bool,
281        }
282
283        let regex = r"^(?P<v>(?i)(true|false))$";
284
285        assert!(from_str::<TestBool>("true", regex).unwrap().v);
286        assert!(!from_str::<TestBool>("false", regex).unwrap().v);
287        assert!(from_str::<TestBool>("TRUE", regex).unwrap().v);
288        assert!(!from_str::<TestBool>("FALSE", regex).unwrap().v);
289        assert!(from_str::<TestBool>("trUE", regex).unwrap().v);
290        assert!(!from_str::<TestBool>("FAlse", regex).unwrap().v);
291
292        let regex = r"^(?P<v>\w*)$";
293
294        assert!(from_str::<TestBool>("SOMETHING ELSE", regex).is_err());
295        assert!(from_str::<TestBool>("", regex).is_err());
296    }
297
298    #[test]
299    fn test_uint() {
300        #[derive(Deserialize)]
301        struct TestUInt {
302            v: u32,
303        }
304
305        let regex = r"^(?P<v>\+?\d+)$";
306
307        assert_eq!(123, from_str::<TestUInt>("123", regex).unwrap().v);
308        assert_eq!(123, from_str::<TestUInt>("+123", regex).unwrap().v);
309        assert!(from_str::<TestUInt>("-123", regex).is_err());
310    }
311
312    #[test]
313    fn test_int() {
314        #[derive(Deserialize)]
315        struct TestInt {
316            v: i32,
317        }
318
319        let regex = r"^(?P<v>[-+]?\d+)$";
320
321        assert_eq!(123, from_str::<TestInt>("123", regex).unwrap().v);
322        assert_eq!(123, from_str::<TestInt>("+123", regex).unwrap().v);
323        assert_eq!(-123, from_str::<TestInt>("-123", regex).unwrap().v);
324        assert!(from_str::<TestInt>("#123", regex).is_err());
325    }
326
327    #[test]
328    fn test_float() {
329        #[derive(Deserialize)]
330        struct TestFloat {
331            v: f32,
332        }
333
334        let regex = r"^(?P<v>[-+]?\d+(\.\d*)?)$";
335
336        assert_eq!(123.0, from_str::<TestFloat>("123", regex).unwrap().v);
337        assert_eq!(1.23, from_str::<TestFloat>("1.23", regex).unwrap().v);
338        assert_eq!(-123.0, from_str::<TestFloat>("-123", regex).unwrap().v);
339        assert_eq!(-1.23, from_str::<TestFloat>("-1.23", regex).unwrap().v);
340
341        assert!(from_str::<TestFloat>("#123", regex).is_err());
342    }
343
344    #[test]
345    fn test_bad_value_error() {
346        let regex = r"^(?P<foo>\w*),(?P<bar>-?\d*)$";
347        let input = "aaa1,-2";
348        let output: Result<Test> = from_str(input, regex);
349
350        assert!(matches!(output, Err(Error::BadValue{..})), "Expected Error::BadValue got {:?}", output);
351    }
352
353    #[test]
354    fn test_newtype() {
355        #[derive(Deserialize)]
356        struct NewType(i32);
357
358        #[derive(Deserialize)]
359        struct Test {
360            v: NewType,
361        }
362
363        let regex = r"^(?P<v>[-+]?\d+)$";
364
365        assert_eq!(123, from_str::<Test>("123", regex).unwrap().v.0);
366        assert_eq!(-123, from_str::<Test>("-123", regex).unwrap().v.0);
367        assert!(from_str::<Test>("#123", regex).is_err());
368    }
369
370    #[test]
371    fn test_enum_simple() {
372        #[allow(dead_code)]
373        #[derive(Deserialize, Debug, PartialEq)]
374        enum TestEnum {
375            Foo,
376            #[serde(rename = "bar")]
377            Bar,
378            #[serde(skip)]
379            Baz(i32),
380        }
381
382        #[derive(Deserialize)]
383        struct Test {
384            v: TestEnum,
385        }
386
387        let regex = r"^(?P<v>[-+]?\w+)$";
388
389        assert_eq!(TestEnum::Foo, from_str::<Test>("Foo", regex).unwrap().v);
390        assert_eq!(TestEnum::Bar, from_str::<Test>("bar", regex).unwrap().v);
391        assert!(from_str::<Test>("foo", regex).is_err());
392        assert!(from_str::<Test>("Baz", regex).is_err());
393    }
394}