1use nom::{
2 branch::alt,
3 bytes::complete::{escaped_transform, tag, take, take_while, take_while1},
4 character::complete::{crlf, digit1, none_of, not_line_ending, one_of, space0, space1},
5 combinator::{all_consuming, map, map_res, opt, recognize, value, verify},
6 error::ErrorKind,
7 multi::{many0, many_till, separated_nonempty_list},
8 sequence::{delimited, pair, preceded, terminated, tuple},
9 IResult,
10};
11
12fn w<'a, O, P>(p: P) -> impl Fn(&'a str) -> IResult<&'a str, O>
13where
14 P: Fn(&'a str) -> IResult<&'a str, O>,
15{
16 preceded(many0(white_space), p)
17}
18
19fn is_idalpha(c: char) -> bool {
20 c.is_alphabetic() || c == '_'
21}
22
23fn is_idalphanum(c: char) -> bool {
24 is_idalpha(c) || c.is_numeric()
25}
26
27fn identifier(input: &str) -> IResult<&str, &str> {
28 w(recognize(pair(
29 take_while1(is_idalpha),
30 take_while(is_idalphanum),
31 )))(input)
32}
33
34fn hash_comment(input: &str) -> IResult<&str, ()> {
35 map(
36 tuple((
37 tag("#"),
38 take_while(|c| c != '\r' && c != '\n'),
39 tag("\r\n"),
40 )),
41 |_| (),
42 )(input)
43}
44
45fn non_newline(input: &str) -> IResult<&str, &str> {
46 recognize(none_of("\n\r"))(input)
47}
48
49fn bracket_comment(input: &str) -> IResult<&str, ()> {
50 value(
51 (),
52 tuple((tag("/*"), many_till(alt((non_newline, crlf)), tag("*/")))),
53 )(input)
54}
55
56fn comment(input: &str) -> IResult<&str, ()> {
57 alt((hash_comment, bracket_comment))(input)
58}
59
60fn multiline_literal(input: &str) -> IResult<&str, &str> {
61 terminated(
62 map(
63 opt(recognize(pair(
64 take_while1(|c| c != '.' && c != '\r' && c != '\n'),
65 take_while(|c| c != '\r' && c != '\n'),
66 ))),
67 |op| op.unwrap_or(""),
68 ),
69 crlf,
70 )(input)
71}
72
73fn multiline_dotstart(input: &str) -> IResult<&str, &str> {
74 delimited(
75 tag("."),
76 verify(not_line_ending, |s: &str| s.len() > 0),
77 crlf,
78 )(input)
79}
80
81fn multi_line(input: &str) -> IResult<&str, Vec<&str>> {
82 delimited(
83 tuple((
84 w(tag("text:")),
85 space0,
86 alt((hash_comment, value((), crlf))),
87 )),
88 many0(alt((multiline_literal, multiline_dotstart))),
89 tag(".\r\n"),
90 )(input)
91}
92
93#[derive(Debug, PartialEq)]
94enum Quantifier {
95 U,
96 K,
97 M,
98 G,
99}
100
101impl Quantifier {
102 fn weight(&self) -> u64 {
103 match self {
104 Quantifier::U => 1,
105 Quantifier::K => 1024,
106 Quantifier::M => 1024 * 1024,
107 Quantifier::G => 1024 * 1024 * 1024,
108 }
109 }
110}
111
112fn quantifier(input: &str) -> IResult<&str, Quantifier> {
113 use Quantifier::*;
114 map(opt(one_of("KMG")), |c| match c {
115 None => U,
116 Some(c) => match c {
117 'K' => K,
118 'M' => M,
119 'G' => G,
120 _ => unreachable!(),
121 },
122 })(input)
123}
124#[test]
125fn parse_quantifier() {
126 assert_eq!(quantifier("K"), Ok(("", Quantifier::K)));
127 assert_eq!(quantifier(""), Ok(("", Quantifier::U)));
128}
129
130fn number(input: &str) -> IResult<&str, u64> {
131 w(map_res(pair(digit1, quantifier), |(n, q)| {
132 n.parse::<u64>()
133 .map_err(|_| (input, ErrorKind::TooLarge))
134 .and_then(|n| {
135 n.checked_mul(q.weight())
136 .ok_or((input, ErrorKind::TooLarge))
137 })
138 }))(input)
139}
140#[test]
141fn parse_number() {
142 assert_eq!(number("1234K blah"), Ok((" blah", 1234 * 1024)));
143 assert_eq!(number("1234 foo"), Ok((" foo", 1234)));
144}
145
146fn tagged_id(input: &str) -> IResult<&str, &str> {
148 preceded(w(tag(":")), identifier)(input)
149}
150
151fn white_space(input: &str) -> IResult<&str, ()> {
152 alt((value((), comment), value((), crlf), value((), space1)))(input)
153 }
155
156fn quoted_string(input: &str) -> IResult<&str, String> {
157 let one: usize = 1;
158 delimited(
159 w(tag("\"")),
160 escaped_transform(none_of(r#"\""#), '\\', take(one)),
161 tag("\""),
162 )(input)
163}
164
165pub fn document(input: &str) -> IResult<&str, Document> {
168 all_consuming(delimited(
169 many0(white_space),
170 map(many0(command), |commands| Document { commands }),
171 many0(white_space),
172 ))(input)
173}
174
175fn command(input: &str) -> IResult<&str, Command> {
176 map(
177 tuple((
178 identifier,
179 argument_group,
180 alt((
181 value(vec![], w(tag(";"))),
182 delimited(w(tag("{")), many0(command), w(tag("}"))),
183 )),
184 )),
185 |(id, args, block)| Command { id, args, block },
186 )(input)
187}
188
189fn test_list(input: &str) -> IResult<&str, Vec<Test>> {
190 map(
191 opt(alt((
192 map(test, |t| vec![t]),
193 delimited(
194 w(tag("(")),
195 separated_nonempty_list(w(tag(",")), test),
196 w(tag(")")),
197 ),
198 ))),
199 |o| o.unwrap_or(vec![]),
200 )(input)
201}
202
203fn argument_group(input: &str) -> IResult<&str, ArgumentGroup> {
204 map(pair(many0(argument), test_list), |(args, tests)| {
205 ArgumentGroup { inner: args, tests }
206 })(input)
207}
208
209fn string_list(input: &str) -> IResult<&str, Vec<StringIsh>> {
210 alt((
211 map(stringish, |s| vec![s]),
212 delimited(
213 w(tag("[")),
214 separated_nonempty_list(w(tag(",")), stringish),
215 w(tag("]")),
216 ),
217 ))(input)
218}
219
220fn stringish(input: &str) -> IResult<&str, StringIsh> {
221 alt((
222 map(quoted_string, |s| StringIsh::Quoted(s)),
223 map(multi_line, |v| StringIsh::MultiLine(v)),
224 ))(input)
225}
226
227fn argument(input: &str) -> IResult<&str, Argument> {
228 alt((
229 map(string_list, |sl| Argument::Strings(sl)),
230 map(number, |n| Argument::Number(n)),
231 map(tagged_id, |id| Argument::Tag(id)),
232 ))(input)
233}
234
235fn test(input: &str) -> IResult<&str, Test> {
236 map(tuple((identifier, argument_group)), |(id, args)| Test {
237 id,
238 args,
239 })(input)
240}
241
242#[derive(Debug, Clone)]
243pub struct Document<'doc> {
244 pub commands: Vec<Command<'doc>>,
245}
246
247#[derive(Debug, Clone)]
248pub struct Command<'doc> {
249 pub id: &'doc str,
250 pub args: ArgumentGroup<'doc>,
251 pub block: Vec<Command<'doc>>,
252}
253
254#[derive(Debug, Clone)]
255pub struct ArgumentGroup<'doc> {
256 pub inner: Vec<Argument<'doc>>,
257 pub tests: Vec<Test<'doc>>,
258}
259
260#[derive(Debug, Clone)]
261pub enum Argument<'doc> {
262 Strings(Vec<StringIsh<'doc>>),
263 Number(u64),
264 Tag(&'doc str),
265}
266
267#[derive(Debug, Clone, PartialEq)]
268pub enum StringIsh<'doc> {
269 Quoted(String),
270 MultiLine(Vec<&'doc str>),
271}
272
273impl<'doc> StringIsh<'doc> {
274 pub fn to_string(&self) -> String {
275 match self {
276 Self::Quoted(s) => s.clone(),
277 Self::MultiLine(ss) => ss.concat(),
278 }
279 }
280}
281
282#[derive(Debug, Clone)]
283pub struct Test<'doc> {
284 pub id: &'doc str,
285 pub args: ArgumentGroup<'doc>,
286}
287
288#[test]
289fn parse_quoted_string() {
290 assert_eq!(
291 quoted_string(r#""asdf\"jk\\l""#),
292 Ok(("", String::from(r#"asdf"jk\l"#)))
293 );
294}
295
296#[cfg(test)]
297const HASHES: &'static [&'static str] = &["# This is a #hash comment\r\n"];
298#[cfg(test)]
299const NON_HASHES: &'static [&'static str] = &["This is not\r. \r\n", " # Nor this.\r\n"];
300#[cfg(test)]
301const BRACKETS: &'static [&'static str] = &[
302 "/* This is a bracket comment*/",
303 "/* And so /* \r\n is this */",
304];
305#[cfg(test)]
306const NON_BRACKETS: &'static [&'static str] = &["/* But \n this fails */"];
307#[test]
308fn parse_id() {
309 assert_eq!(identifier("hello_there0"), Ok(("", "hello_there0")));
310 assert!(identifier("0hello_there0").is_err());
311}
312#[test]
313fn parse_hash_comment() {
314 for s in HASHES {
315 assert!(hash_comment(s).is_ok());
316 }
317 for s in NON_HASHES {
318 assert!(hash_comment(s).is_err());
319 }
320}
321#[test]
322fn parse_bracket_comment() {
323 for s in BRACKETS {
324 assert!(bracket_comment(s).is_ok());
325 }
326 for s in NON_BRACKETS {
327 assert!(bracket_comment(s).is_err());
328 }
329}
330#[test]
331fn parse_comment() {
332 for s in BRACKETS {
333 assert!(comment(s).is_ok());
334 }
335 for s in NON_BRACKETS {
336 assert!(comment(s).is_err());
337 }
338 for s in HASHES {
339 assert!(comment(s).is_ok());
340 }
341 for s in NON_HASHES {
342 assert!(comment(s).is_err());
343 }
344}
345#[test]
346fn parse_multiline_literal() {
347 assert_eq!(
348 multiline_literal("Hello, there!\r\n"),
349 Ok(("", "Hello, there!"))
350 );
351 assert!(multiline_literal(".Dots are not allowed\r\n").is_err());
352 assert!(multiline_literal("Neither are\ninternal newlines\r\n").is_err());
353}
354#[test]
355fn parse_multiline_dotstart() {
356 assert_eq!(
357 multiline_dotstart(".Dots are OK here\r\n"),
358 Ok(("", "Dots are OK here"))
359 );
360 assert!(multiline_dotstart(".\r\n").is_err());
361 assert!(multiline_dotstart("No dot is bad\r\n").is_err());
362}
363#[test]
364fn parse_multi_line() {
365 assert_eq!(multi_line("text: \t #begin text\r\nThis is some multi-line text\r\n.With embedded dots\r\n..\r\n.\r\n"),
366 Ok(("", vec!["This is some multi-line text", "With embedded dots", "."])));
367}