libsieve/
parse.rs

1use nom::{
2    branch::alt,
3    bytes::complete::{escaped_transform, tag, take, take_while, take_while1},
4    character::complete::{crlf, digit1, none_of, not_line_ending, one_of, space0, space1},
5    combinator::{all_consuming, map, map_res, opt, recognize, value, verify},
6    error::ErrorKind,
7    multi::{many0, many_till, separated_nonempty_list},
8    sequence::{delimited, pair, preceded, terminated, tuple},
9    IResult,
10};
11
12fn w<'a, O, P>(p: P) -> impl Fn(&'a str) -> IResult<&'a str, O>
13where
14    P: Fn(&'a str) -> IResult<&'a str, O>,
15{
16    preceded(many0(white_space), p)
17}
18
19fn is_idalpha(c: char) -> bool {
20    c.is_alphabetic() || c == '_'
21}
22
23fn is_idalphanum(c: char) -> bool {
24    is_idalpha(c) || c.is_numeric()
25}
26
27fn identifier(input: &str) -> IResult<&str, &str> {
28    w(recognize(pair(
29        take_while1(is_idalpha),
30        take_while(is_idalphanum),
31    )))(input)
32}
33
34fn hash_comment(input: &str) -> IResult<&str, ()> {
35    map(
36        tuple((
37            tag("#"),
38            take_while(|c| c != '\r' && c != '\n'),
39            tag("\r\n"),
40        )),
41        |_| (),
42    )(input)
43}
44
45fn non_newline(input: &str) -> IResult<&str, &str> {
46    recognize(none_of("\n\r"))(input)
47}
48
49fn bracket_comment(input: &str) -> IResult<&str, ()> {
50    value(
51        (),
52        tuple((tag("/*"), many_till(alt((non_newline, crlf)), tag("*/")))),
53    )(input)
54}
55
56fn comment(input: &str) -> IResult<&str, ()> {
57    alt((hash_comment, bracket_comment))(input)
58}
59
60fn multiline_literal(input: &str) -> IResult<&str, &str> {
61    terminated(
62        map(
63            opt(recognize(pair(
64                take_while1(|c| c != '.' && c != '\r' && c != '\n'),
65                take_while(|c| c != '\r' && c != '\n'),
66            ))),
67            |op| op.unwrap_or(""),
68        ),
69        crlf,
70    )(input)
71}
72
73fn multiline_dotstart(input: &str) -> IResult<&str, &str> {
74    delimited(
75        tag("."),
76        verify(not_line_ending, |s: &str| s.len() > 0),
77        crlf,
78    )(input)
79}
80
81fn multi_line(input: &str) -> IResult<&str, Vec<&str>> {
82    delimited(
83        tuple((
84            w(tag("text:")),
85            space0,
86            alt((hash_comment, value((), crlf))),
87        )),
88        many0(alt((multiline_literal, multiline_dotstart))),
89        tag(".\r\n"),
90    )(input)
91}
92
93#[derive(Debug, PartialEq)]
94enum Quantifier {
95    U,
96    K,
97    M,
98    G,
99}
100
101impl Quantifier {
102    fn weight(&self) -> u64 {
103        match self {
104            Quantifier::U => 1,
105            Quantifier::K => 1024,
106            Quantifier::M => 1024 * 1024,
107            Quantifier::G => 1024 * 1024 * 1024,
108        }
109    }
110}
111
112fn quantifier(input: &str) -> IResult<&str, Quantifier> {
113    use Quantifier::*;
114    map(opt(one_of("KMG")), |c| match c {
115        None => U,
116        Some(c) => match c {
117            'K' => K,
118            'M' => M,
119            'G' => G,
120            _ => unreachable!(),
121        },
122    })(input)
123}
124#[test]
125fn parse_quantifier() {
126    assert_eq!(quantifier("K"), Ok(("", Quantifier::K)));
127    assert_eq!(quantifier(""), Ok(("", Quantifier::U)));
128}
129
130fn number(input: &str) -> IResult<&str, u64> {
131    w(map_res(pair(digit1, quantifier), |(n, q)| {
132        n.parse::<u64>()
133            .map_err(|_| (input, ErrorKind::TooLarge))
134            .and_then(|n| {
135                n.checked_mul(q.weight())
136                    .ok_or((input, ErrorKind::TooLarge))
137            })
138    }))(input)
139}
140#[test]
141fn parse_number() {
142    assert_eq!(number("1234K blah"), Ok((" blah", 1234 * 1024)));
143    assert_eq!(number("1234 foo"), Ok((" foo", 1234)));
144}
145
146// Called "tag" in RFC5228
147fn tagged_id(input: &str) -> IResult<&str, &str> {
148    preceded(w(tag(":")), identifier)(input)
149}
150
151fn white_space(input: &str) -> IResult<&str, ()> {
152    alt((value((), comment), value((), crlf), value((), space1)))(input)
153    //    alt((comment, value((), many1(alt((crlf, space1))))))(input)
154}
155
156fn quoted_string(input: &str) -> IResult<&str, String> {
157    let one: usize = 1;
158    delimited(
159        w(tag("\"")),
160        escaped_transform(none_of(r#"\""#), '\\', take(one)),
161        tag("\""),
162    )(input)
163}
164
165// PARSING BEGINS HERE
166
167pub fn document(input: &str) -> IResult<&str, Document> {
168    all_consuming(delimited(
169        many0(white_space),
170        map(many0(command), |commands| Document { commands }),
171        many0(white_space),
172    ))(input)
173}
174
175fn command(input: &str) -> IResult<&str, Command> {
176    map(
177        tuple((
178            identifier,
179            argument_group,
180            alt((
181                value(vec![], w(tag(";"))),
182                delimited(w(tag("{")), many0(command), w(tag("}"))),
183            )),
184        )),
185        |(id, args, block)| Command { id, args, block },
186    )(input)
187}
188
189fn test_list(input: &str) -> IResult<&str, Vec<Test>> {
190    map(
191        opt(alt((
192            map(test, |t| vec![t]),
193            delimited(
194                w(tag("(")),
195                separated_nonempty_list(w(tag(",")), test),
196                w(tag(")")),
197            ),
198        ))),
199        |o| o.unwrap_or(vec![]),
200    )(input)
201}
202
203fn argument_group(input: &str) -> IResult<&str, ArgumentGroup> {
204    map(pair(many0(argument), test_list), |(args, tests)| {
205        ArgumentGroup { inner: args, tests }
206    })(input)
207}
208
209fn string_list(input: &str) -> IResult<&str, Vec<StringIsh>> {
210    alt((
211        map(stringish, |s| vec![s]),
212        delimited(
213            w(tag("[")),
214            separated_nonempty_list(w(tag(",")), stringish),
215            w(tag("]")),
216        ),
217    ))(input)
218}
219
220fn stringish(input: &str) -> IResult<&str, StringIsh> {
221    alt((
222        map(quoted_string, |s| StringIsh::Quoted(s)),
223        map(multi_line, |v| StringIsh::MultiLine(v)),
224    ))(input)
225}
226
227fn argument(input: &str) -> IResult<&str, Argument> {
228    alt((
229        map(string_list, |sl| Argument::Strings(sl)),
230        map(number, |n| Argument::Number(n)),
231        map(tagged_id, |id| Argument::Tag(id)),
232    ))(input)
233}
234
235fn test(input: &str) -> IResult<&str, Test> {
236    map(tuple((identifier, argument_group)), |(id, args)| Test {
237        id,
238        args,
239    })(input)
240}
241
242#[derive(Debug, Clone)]
243pub struct Document<'doc> {
244    pub commands: Vec<Command<'doc>>,
245}
246
247#[derive(Debug, Clone)]
248pub struct Command<'doc> {
249    pub id: &'doc str,
250    pub args: ArgumentGroup<'doc>,
251    pub block: Vec<Command<'doc>>,
252}
253
254#[derive(Debug, Clone)]
255pub struct ArgumentGroup<'doc> {
256    pub inner: Vec<Argument<'doc>>,
257    pub tests: Vec<Test<'doc>>,
258}
259
260#[derive(Debug, Clone)]
261pub enum Argument<'doc> {
262    Strings(Vec<StringIsh<'doc>>),
263    Number(u64),
264    Tag(&'doc str),
265}
266
267#[derive(Debug, Clone, PartialEq)]
268pub enum StringIsh<'doc> {
269    Quoted(String),
270    MultiLine(Vec<&'doc str>),
271}
272
273impl<'doc> StringIsh<'doc> {
274    pub fn to_string(&self) -> String {
275        match self {
276            Self::Quoted(s) => s.clone(),
277            Self::MultiLine(ss) => ss.concat(),
278        }
279    }
280}
281
282#[derive(Debug, Clone)]
283pub struct Test<'doc> {
284    pub id: &'doc str,
285    pub args: ArgumentGroup<'doc>,
286}
287
288#[test]
289fn parse_quoted_string() {
290    assert_eq!(
291        quoted_string(r#""asdf\"jk\\l""#),
292        Ok(("", String::from(r#"asdf"jk\l"#)))
293    );
294}
295
296#[cfg(test)]
297const HASHES: &'static [&'static str] = &["# This is a #hash comment\r\n"];
298#[cfg(test)]
299const NON_HASHES: &'static [&'static str] = &["This is not\r. \r\n", " # Nor this.\r\n"];
300#[cfg(test)]
301const BRACKETS: &'static [&'static str] = &[
302    "/* This is a bracket comment*/",
303    "/* And so /* \r\n is this */",
304];
305#[cfg(test)]
306const NON_BRACKETS: &'static [&'static str] = &["/* But \n this fails */"];
307#[test]
308fn parse_id() {
309    assert_eq!(identifier("hello_there0"), Ok(("", "hello_there0")));
310    assert!(identifier("0hello_there0").is_err());
311}
312#[test]
313fn parse_hash_comment() {
314    for s in HASHES {
315        assert!(hash_comment(s).is_ok());
316    }
317    for s in NON_HASHES {
318        assert!(hash_comment(s).is_err());
319    }
320}
321#[test]
322fn parse_bracket_comment() {
323    for s in BRACKETS {
324        assert!(bracket_comment(s).is_ok());
325    }
326    for s in NON_BRACKETS {
327        assert!(bracket_comment(s).is_err());
328    }
329}
330#[test]
331fn parse_comment() {
332    for s in BRACKETS {
333        assert!(comment(s).is_ok());
334    }
335    for s in NON_BRACKETS {
336        assert!(comment(s).is_err());
337    }
338    for s in HASHES {
339        assert!(comment(s).is_ok());
340    }
341    for s in NON_HASHES {
342        assert!(comment(s).is_err());
343    }
344}
345#[test]
346fn parse_multiline_literal() {
347    assert_eq!(
348        multiline_literal("Hello, there!\r\n"),
349        Ok(("", "Hello, there!"))
350    );
351    assert!(multiline_literal(".Dots are not allowed\r\n").is_err());
352    assert!(multiline_literal("Neither are\ninternal newlines\r\n").is_err());
353}
354#[test]
355fn parse_multiline_dotstart() {
356    assert_eq!(
357        multiline_dotstart(".Dots are OK here\r\n"),
358        Ok(("", "Dots are OK here"))
359    );
360    assert!(multiline_dotstart(".\r\n").is_err());
361    assert!(multiline_dotstart("No dot is bad\r\n").is_err());
362}
363#[test]
364fn parse_multi_line() {
365    assert_eq!(multi_line("text: \t #begin text\r\nThis is some multi-line text\r\n.With embedded dots\r\n..\r\n.\r\n"),
366        Ok(("", vec!["This is some multi-line text", "With embedded dots", "."])));
367}