props_rs/
parser.rs

1//! A nom parser for Java properties files
2use nom::branch::alt;
3use nom::bytes::complete::{tag, take_till};
4use nom::combinator::{complete, eof, opt, value};
5
6use nom::character::complete::{none_of, one_of};
7use nom::multi::{many0, many1, many_till, separated_list0, separated_list1};
8
9use nom::IResult;
10
11/// A property representing a parsed configuration key-value pair.
12#[derive(Clone, Debug, PartialEq, PartialOrd)]
13pub struct Property {
14    /// A string representing the identifier for a particular property
15    pub key: String,
16    /// A string representing the value for a particular property.
17    pub value: String,
18}
19
20/// Consumes a sequence of spaces, tabs, and form feeds ("\f")
21fn consume_whitespaces(input: &[u8]) -> IResult<&[u8], ()> {
22    let (input, _) = many0(one_of(" \t\u{c}"))(input)?;
23    Ok((input, ()))
24}
25
26/// Consumes a single EOL of "\r\n", "\r" or "\n"
27fn consume_eol(input: &[u8]) -> IResult<&[u8], ()> {
28    let (input, _) = alt((complete(tag("\r\n")), tag("\r"), tag("\n")))(input)?;
29    Ok((input, ()))
30}
31
32/// Consumes an EOL or EOF
33fn consume_eol_or_eof(input: &[u8]) -> IResult<&[u8], ()> {
34    alt((value((), eof), consume_eol))(input)
35}
36
37/// Consumes a single blank line
38fn blank_line(input: &[u8]) -> IResult<&[u8], ()> {
39    let (input, _) = consume_whitespaces(input)?;
40    consume_eol_or_eof(input)
41}
42
43/// Consumes a line with a comment
44fn comment_line(input: &[u8]) -> IResult<&[u8], ()> {
45    let (input, _) = consume_whitespaces(input)?;
46    let (input, _) = one_of("#!")(input)?;
47    let (input, _) = take_till(eol)(input)?;
48    consume_eol_or_eof(input)
49}
50
51/// Returns whether or not a byte (as a character) represents a EOL character
52/// (line feed `\r` or newline `\n`)
53fn eol(c: u8) -> bool {
54    c as char == '\r' || c as char == '\n'
55}
56
57/// Consumes a single line escape and any whitespaces after it
58fn consume_line(input: &[u8]) -> IResult<&[u8], ()> {
59    let (input, _) = tag(r"\")(input)?;
60    let (input, _) = consume_eol(input)?;
61    let (input, _) = consume_whitespaces(input)?;
62    Ok((input, ()))
63}
64
65/// Consumes a set of alternating lines and whiespaces. Stopping once there is no more alternating
66fn consume_whitespaces_and_lines(input: &[u8]) -> IResult<&[u8], ()> {
67    let (input, _) = separated_list0(many1(consume_line), consume_whitespaces)(input)?;
68    Ok((input, ()))
69}
70
71/// Consumes a character that exists in a key
72fn char_in_key(input: &[u8]) -> IResult<&[u8], char> {
73    none_of(":=\n\r \t\u{c}\\")(input)
74}
75
76/// Consumes a character which exists in a value
77fn char_in_value(input: &[u8]) -> IResult<&[u8], char> {
78    none_of("\n\r\\")(input)
79}
80
81/// matches a single character and returns its escaped equivalent e.g. `'t' -> '\t'`
82fn escaped_char_to_char(v: char) -> char {
83    match v {
84        't' => '\t',
85        'n' => '\n',
86        'f' => '\u{c}',
87        'r' => '\r',
88        '\\' => '\\',
89        _ => v,
90    }
91}
92
93/// consumes an escaped character in a key or value
94fn escape_in_key_or_value(input: &[u8]) -> IResult<&[u8], char> {
95    let (input, _) = tag(r"\")(input)?;
96    let (input, c) = none_of("u\r\n")(input)?;
97    Ok((input, escaped_char_to_char(c)))
98}
99
100/// consumes a character in a key
101fn one_char_in_key(input: &[u8]) -> IResult<&[u8], char> {
102    alt((escape_in_key_or_value, char_in_key))(input)
103}
104
105/// consumes a character in a value
106fn one_char_in_value(input: &[u8]) -> IResult<&[u8], char> {
107    alt((escape_in_key_or_value, char_in_value))(input)
108}
109
110/// Consumes and returns a `String` representing the key to a property.
111fn consume_key(input: &[u8]) -> IResult<&[u8], String> {
112    // use many1(consume_line) because many0 always returns true and causes a separated list error.
113    let (input, chars) = separated_list1(many1(consume_line), many1(one_char_in_key))(input)?;
114    Ok((input, chars.into_iter().flatten().collect::<String>()))
115}
116
117/// Consumes and returns a `String` representing the value of a property.
118fn consume_value(input: &[u8]) -> IResult<&[u8], String> {
119    // use many1(consume_line) because many0 always returns true and causes a separated list error.
120    let (input, chars) = separated_list0(many1(consume_line), many0(one_char_in_value))(input)?;
121    Ok((input, chars.into_iter().flatten().collect::<String>()))
122}
123
124/// Consumes an entire line (or set of lines) representing a key-value property
125fn kv_line(input: &[u8]) -> IResult<&[u8], Property> {
126    let (input, _) = consume_whitespaces_and_lines(input)?;
127    let (input, key) = consume_key(input)?;
128    let (input, _) = consume_whitespaces_and_lines(input)?;
129    let (input, _) = opt(complete(one_of(":=")))(input)?;
130    let (input, _) = consume_whitespaces_and_lines(input)?;
131    let (input, value) = consume_value(input)?;
132    let (input, _) = consume_eol_or_eof(input)?;
133    Ok((input, Property { key, value }))
134}
135
136type ParsedProps<'a> = (Vec<Option<Property>>, &'a [u8]);
137
138/// The full parser which consumes comments, blanks, and Property lines.
139fn _fparser(input: &[u8]) -> IResult<&[u8], ParsedProps> {
140    many_till(
141        alt((
142            value(None, complete(comment_line)),
143            value(None, complete(blank_line)),
144            opt(complete(kv_line)),
145        )),
146        eof,
147    )(input)
148}
149
150/// Public parser function
151pub fn parser(input: &[u8]) -> IResult<&[u8], Vec<Property>> {
152    let (input, props) = _fparser(input)?;
153    let v = props.0.into_iter().flatten().collect();
154    Ok((input, v))
155}
156
157#[cfg(test)]
158mod test {
159    use super::*;
160    use nom::error::dbg_dmp;
161
162    macro_rules! assert_done {
163        ($t:expr, $v:expr) => {
164            assert_eq!($t, Ok((&b""[..], $v)))
165        };
166    }
167
168    macro_rules! assert_done_partial {
169        ($t:expr, $v:expr, $s:tt) => {
170            assert_eq!($t, Ok((&$s[..], $v)))
171        };
172    }
173
174    macro_rules! assert_incomplete {
175        ($t:expr) => {
176            let r = $t;
177            assert!(r.is_err(), "Expected IResult::Incomplete, got {:?}", r);
178        };
179    }
180
181    #[test]
182    fn test_key() {
183        // simple test
184        assert_done!(consume_key(b"hello"), String::from("hello"));
185
186        // A space ends the key
187        assert_done_partial!(
188            consume_key(b"hello world"),
189            String::from("hello"),
190            b" world"
191        );
192
193        // A colon ends the key
194        assert_done_partial!(
195            consume_key(b"hello:world"),
196            String::from("hello"),
197            b":world"
198        );
199
200        // An equal sign ends the key
201        assert_done_partial!(
202            consume_key(b"hello=world"),
203            String::from("hello"),
204            b"=world"
205        );
206
207        // An eol ends the key
208        assert_done_partial!(
209            consume_key(b"hello\nworld"),
210            String::from("hello"),
211            b"\nworld"
212        );
213        assert_done_partial!(
214            consume_key(b"hello\rworld"),
215            String::from("hello"),
216            b"\rworld"
217        );
218
219        // These characters are valid
220        assert_done!(
221            consume_key(b"@#$%^&*()_+-`~?/.>,<|][{};\""),
222            String::from("@#$%^&*()_+-`~?/.>,<|][{};\"")
223        );
224
225        // Spaces can be escaped
226        assert_done!(
227            consume_key(br"key\ with\ spaces"),
228            String::from("key with spaces")
229        );
230
231        // Colons can be escaped
232        assert_done!(
233            consume_key(br"key\:with\:colons"),
234            String::from("key:with:colons")
235        );
236
237        // Equals can be escaped
238        assert_done!(
239            consume_key(br"key\=with\=equals"),
240            String::from("key=with=equals")
241        );
242
243        // Special characters can be escaped
244        assert_done!(
245            consume_key(br"now\nwith\rsome\fspecial\tcharacters\\"),
246            String::from("now\nwith\rsome\u{c}special\tcharacters\\")
247        );
248
249        // Escapes on non escapable characters are ignored
250        assert_done!(
251            consume_key(br"w\iths\omeran\domch\arse\sca\pe\d"),
252            String::from("withsomerandomcharsescaped")
253        );
254
255        // No input is not a key
256        assert_incomplete!(consume_key(b""));
257
258        // With logical line splits
259        assert_done!(
260            dbg_dmp(consume_key, "ell")(b"abc\\\n   def"),
261            String::from("abcdef")
262        );
263        assert_done!(
264            dbg_dmp(consume_key, "ell")(b"gh\\\n    \\\r    \\\r\nij\\\n\t kl"),
265            String::from("ghijkl")
266        );
267    }
268
269    /// utf-8 not yet implemented
270    #[allow(dead_code)]
271    fn test_utf8_keys() {
272        // Unicode esacpes
273        assert_done!(
274            consume_key(br"\u0048\u0065\u006c\u006c\u006f"),
275            String::from("Hello")
276        );
277
278        // A byte above 127 is interpreted as a latin-1 extended character with
279        // the same Unicode code point value.
280        assert_done!(consume_key(&[0xA9]), String::from("\u{a9}"));
281
282        // An \u escape must be followed by 4 hex digits.
283        assert_done_partial!(
284            consume_key(br"abc\uhello"),
285            String::from("abc"),
286            br"\uhello"
287        );
288    }
289
290    #[test]
291    fn test_value() {
292        // basic case
293        assert_done!(consume_value(b"hello"), String::from("hello"));
294
295        // colons and equal signs are valid
296        assert_done!(consume_value(b"h:l=o"), String::from("h:l=o"));
297
298        // spaces are valid, even at the end
299        assert_done!(
300            consume_value(b"hello world  "),
301            String::from("hello world  ")
302        );
303
304        // These are valid characters
305        assert_done!(
306            consume_value(b"/~`!@#$%^&*()-_=+[{]};:'\",<.>/?|"),
307            String::from("/~`!@#$%^&*()-_=+[{]};:'\",<.>/?|")
308        );
309
310        // An eol ends the value
311        assert_done_partial!(
312            consume_value(b"hello\nworld"),
313            String::from("hello"),
314            b"\nworld"
315        );
316        assert_done_partial!(
317            consume_value(b"hello\rworld"),
318            String::from("hello"),
319            b"\rworld"
320        );
321
322        // Special characters can be escaped
323        assert_done!(
324            consume_value(br"now\nwith\rsome\fspecial\tcharacters\\"),
325            String::from("now\nwith\rsome\u{c}special\tcharacters\\")
326        );
327
328        // Escapes on non escapable characters are ignored
329        assert_done!(
330            consume_value(br"w\iths\omeran\domch\arse\sca\pe\d"),
331            String::from("withsomerandomcharsescaped")
332        );
333
334        // No input is a valid value
335        assert_done!(consume_value(b""), String::from(""));
336
337        // With logical line splits
338        assert_done!(consume_value(b"abc\\\n   def"), String::from("abcdef"));
339        assert_done!(
340            consume_value(b"gh\\\n    \\\r    \\\r\nij\\\n\t kl"),
341            String::from("ghijkl")
342        );
343    }
344
345    /// utf-8 not yet implemented
346    #[allow(dead_code)]
347    fn test_utf8_values() {
348        // Unicode esacpes
349        assert_done!(
350            consume_value(br"\u0048\u0065\u006c\u006c\u006f"),
351            String::from("Hello")
352        );
353
354        // A byte above 127 is interpreted as a latin-1 extended character with
355        // the same Unicode code point value.
356        assert_done!(consume_value(&[0xA9]), String::from("\u{a9}"));
357
358        // An \u escape must be followed by 4 hex digits.
359        assert_done_partial!(
360            consume_value(br"abc\uhello"),
361            String::from("abc"),
362            br"\uhello"
363        );
364    }
365
366    #[test]
367    fn test_kv_line() {
368        let parsed = kv_line(b"key=value");
369        assert_eq!(
370            parsed.unwrap().1,
371            Property {
372                key: String::from("key"),
373                value: String::from("value")
374            }
375        );
376    }
377
378    #[test]
379    fn test_full_parse_simple() {
380        let prop = br"key.1=value1
381key.two=value2
382
383";
384        let parsed = _fparser(prop);
385        let props = parsed.unwrap().1;
386        println!("{:?}", props.0);
387        assert_eq!(3, props.0.len());
388        let props: Vec<Property> = props.0.into_iter().flatten().collect();
389        assert_eq!(2, props.len());
390        assert_eq!(props[0].key, "key.1");
391        assert_eq!(props[0].value, "value1");
392        assert_eq!(props[1].key, "key.two");
393        assert_eq!(props[1].value, "value2")
394    }
395
396    #[test]
397    fn test_pub_parser() {
398        let prop = br"key.1=value1
399key.two=value2
400
401";
402        let parsed = parser(prop);
403        let props = parsed.unwrap().1;
404        assert_eq!(2, props.len());
405        assert_eq!(props[0].key, "key.1");
406        assert_eq!(props[0].value, "value1");
407        assert_eq!(props[1].key, "key.two");
408        assert_eq!(props[1].value, "value2")
409    }
410}