quack_builder/
parser.rs

1use std::convert::TryInto;
2use std::str;
3use std::{convert::TryFrom, path::PathBuf};
4
5use crate::types::{
6    Enumerator, Extend, Extensions, Field, FieldType, FileDescriptor, Frequency, Message, OneOf,
7    Proto2Frequency, Proto3Frequency, RpcFunctionDeclaration, RpcService, Syntax,
8};
9
10use nom::Parser;
11use nom::{
12    IResult,
13    branch::alt,
14    bytes::complete::{tag, take_until},
15    character::complete::{
16        alpha1, alphanumeric1, anychar, digit1, hex_digit1, multispace1, not_line_ending,
17    },
18    combinator::{map, map_res, opt, recognize, value, verify},
19    multi::{many0, many1, separated_list0, separated_list1},
20    sequence::{delimited, pair, preceded, separated_pair, terminated},
21};
22
23#[derive(Debug, Clone)]
24enum ParsingStageFrequencyToken {
25    Optional,
26    Repeated,
27    Required,
28}
29
30#[derive(Debug, Clone)]
31#[allow(clippy::large_enum_variant)]
32enum MessageEvent {
33    Message(Message),
34    Enumerator(Enumerator),
35    Field(Field),
36    ReservedNums(Vec<i32>),
37    ReservedNames(Vec<String>),
38    OneOf(OneOf),
39    Extensions(Extensions),
40    Ignore,
41}
42
43#[derive(Debug, Clone)]
44enum EnumEvent {
45    Field((String, i32)),
46    Ignore,
47}
48
49#[derive(Debug, Clone)]
50enum Event {
51    Syntax(Syntax),
52    Import(PathBuf),
53    Package(String),
54    Message(Message),
55    Enum(Enumerator),
56    RpcService(RpcService),
57    Extend(Extend),
58    Ignore,
59}
60
61fn qualifiable_name(input: &str) -> IResult<&str, String> {
62    map(
63        verify(
64            recognize(pair(opt(tag(".")), separated_list1(tag("."), word))),
65            |s: &str| !s.ends_with('.') && !s.contains(".."),
66        ),
67        std::borrow::ToOwned::to_owned,
68    )
69    .parse(input)
70}
71
72fn word_ref(input: &str) -> IResult<&str, &str> {
73    recognize(pair(
74        alt((
75            // I would really rather just take in 1 alphabetic
76            // character, but just using `alpha1()` is also technically
77            // correct for our use case and is simpler to implement in
78            // nom apparently
79            alpha1,
80            tag("_"),
81        )),
82        many0(alt((alphanumeric1, tag("_")))),
83    ))
84    .parse(input)
85}
86
87fn word(input: &str) -> IResult<&str, String> {
88    map(word_ref, |word| word.to_owned()).parse(input)
89}
90
91fn hex_integer(input: &str) -> IResult<&str, i32> {
92    preceded(
93        tag("0x"),
94        map_res(hex_digit1, |s: &str| i32::from_str_radix(s, 16)),
95    )
96    .parse(input)
97}
98
99fn integer(input: &str) -> IResult<&str, i32> {
100    map_res(digit1, |s: &str| s.parse()).parse(input)
101}
102
103fn comment(input: &str) -> IResult<&str, ()> {
104    value((), pair(tag("//"), not_line_ending)).parse(input)
105}
106
107fn block_comment(input: &str) -> IResult<&str, ()> {
108    value((), (tag("/*"), take_until("*/"), tag("*/"))).parse(input)
109}
110
111fn string(input: &str) -> IResult<&str, String> {
112    map(
113        delimited(tag("\""), take_until("\""), tag("\"")),
114        |s: &str| s.to_owned(),
115    )
116    .parse(input)
117}
118
119// word break: multispace or comment
120fn br(input: &str) -> IResult<&str, ()> {
121    value(
122        (),
123        many1(alt((value((), multispace1), comment, block_comment))),
124    )
125    .parse(input)
126}
127
128fn syntax(input: &str) -> IResult<&str, Syntax> {
129    delimited(
130        (tag("syntax"), many0(br), tag("="), many0(br)),
131        alt((
132            value(Syntax::Proto2, tag("\"proto2\"")),
133            value(Syntax::Proto3, tag("\"proto3\"")),
134        )),
135        pair(many0(br), tag(";")),
136    )
137    .parse(input)
138}
139
140fn import(input: &str) -> IResult<&str, PathBuf> {
141    delimited(
142        pair(tag("import"), many1(br)),
143        map(string, PathBuf::from),
144        pair(many0(br), tag(";")),
145    )
146    .parse(input)
147}
148
149fn package(input: &str) -> IResult<&str, String> {
150    delimited(
151        pair(tag("package"), many1(br)),
152        qualifiable_name,
153        pair(many0(br), tag(";")),
154    )
155    .parse(input)
156}
157
158fn extensions(input: &str) -> IResult<&str, Extensions> {
159    map(
160        delimited(
161            pair(tag("extensions"), many1(br)),
162            pair(
163                integer,
164                preceded(pair(many0(br), pair(tag("to"), many1(br))), take_until(";")),
165            ),
166            tag(";"),
167        ),
168        |(from, to)| {
169            // TODO: is there a better way to parse "max" or a number?
170            let s = to.trim();
171            let to = if s == "max" {
172                Extensions::max()
173            } else {
174                s.parse().unwrap()
175            };
176            Extensions { from, to }
177        },
178    )
179    .parse(input)
180}
181
182fn num_range(input: &str) -> IResult<&str, Vec<i32>> {
183    map(
184        separated_pair(integer, (many1(br), tag("to"), many1(br)), integer),
185        |(from_, to)| (from_..=to).collect(),
186    )
187    .parse(input)
188}
189
190fn reserved_nums(input: &str) -> IResult<&str, Vec<i32>> {
191    map(
192        delimited(
193            pair(tag("reserved"), many1(br)),
194            separated_list1(
195                (many0(br), tag(","), many0(br)),
196                alt((num_range, map(integer, |i| vec![i]))),
197            ),
198            pair(many0(br), tag(";")),
199        ),
200        |nums| nums.into_iter().flat_map(|v| v.into_iter()).collect(),
201    )
202    .parse(input)
203}
204
205fn reserved_names(input: &str) -> IResult<&str, Vec<String>> {
206    delimited(
207        pair(tag("reserved"), many1(br)),
208        separated_list1((many0(br), tag(","), many0(br)), string),
209        pair(many0(br), tag(";")),
210    )
211    .parse(input)
212}
213
214fn key_val(input: &str) -> IResult<&str, (&str, &str)> {
215    delimited(
216        pair(tag("["), many0(br)),
217        separated_pair(
218            word_ref,
219            delimited(many0(br), tag("="), many0(br)),
220            map(take_until("]"), |v: &str| v.trim()),
221        ),
222        tag("]"),
223    )
224    .parse(input)
225}
226
227fn frequency(input: &str) -> IResult<&str, ParsingStageFrequencyToken> {
228    alt((
229        value(ParsingStageFrequencyToken::Optional, tag("optional")),
230        value(ParsingStageFrequencyToken::Repeated, tag("repeated")),
231        value(ParsingStageFrequencyToken::Required, tag("required")),
232    ))
233    .parse(input)
234}
235
236fn field_type(input: &str) -> IResult<&str, FieldType> {
237    alt((
238        value(FieldType::Int32, tag("int32")),
239        value(FieldType::Int64, tag("int64")),
240        value(FieldType::Uint32, tag("uint32")),
241        value(FieldType::Uint64, tag("uint64")),
242        value(FieldType::Sint32, tag("sint32")),
243        value(FieldType::Sint64, tag("sint64")),
244        value(FieldType::Fixed32, tag("fixed32")),
245        value(FieldType::Sfixed32, tag("sfixed32")),
246        value(FieldType::Fixed64, tag("fixed64")),
247        value(FieldType::Sfixed64, tag("sfixed64")),
248        value(FieldType::Bool, tag("bool")),
249        value(FieldType::StringCow, tag("string")),
250        value(FieldType::BytesCow, tag("bytes")),
251        value(FieldType::Float, tag("float")),
252        value(FieldType::Double, tag("double")),
253        map(map_field, |(k, v)| FieldType::Map(Box::new(k), Box::new(v))),
254        map(qualifiable_name, FieldType::MessageOrEnum),
255    ))
256    .parse(input)
257}
258
259fn map_field(input: &str) -> IResult<&str, (FieldType, FieldType)> {
260    delimited(
261        (tag("map"), many0(br), tag("<"), many0(br)),
262        separated_pair(
263            field_type,
264            delimited(many0(br), tag(","), many0(br)),
265            field_type,
266        ),
267        pair(many0(br), tag(">")),
268    )
269    .parse(input)
270}
271
272fn default_check<'a>(
273    syntax: Syntax,
274    typ: FieldType,
275    key_vals: &[(&'a str, &'a str)],
276) -> Result<Option<String>, &'a str> {
277    for &(k, v) in key_vals.iter() {
278        if k == "default" {
279            return match (syntax, typ) {
280                (Syntax::Proto2, FieldType::StringCow | FieldType::BytesCow) => {
281                    let remove_compulsory_inverted_commas: IResult<&str, &str> = alt((
282                        delimited(tag("\""), take_until("\""), tag("\"")),
283                        delimited(tag("\'"), take_until("\'"), tag("\'")),
284                    ))
285                    .parse(v);
286                    remove_compulsory_inverted_commas
287                        .map(|(_, s)| Some(s.to_owned()))
288                        .map_err(|_| "Default value must be wrapped in inverted commas!")
289                }
290                (Syntax::Proto2, _) => Ok(Some(v.to_owned())),
291                (Syntax::Proto3, _) => Ok(Some(v.to_owned())),
292            };
293        }
294    }
295    Ok(None)
296}
297
298impl TryFrom<(Syntax, FieldType, Option<ParsingStageFrequencyToken>)> for Frequency {
299    type Error = &'static str;
300
301    fn try_from(
302        value: (Syntax, FieldType, Option<ParsingStageFrequencyToken>)
303    ) -> Result<Self, Self::Error> {
304        match value {
305            (Syntax::Proto2, FieldType::Map(..), _) => {
306                Ok(Frequency::Proto2Frequency(Proto2Frequency::Map))
307            }
308            (Syntax::Proto2, _, Some(ParsingStageFrequencyToken::Required)) => {
309                Ok(Frequency::Proto2Frequency(Proto2Frequency::Required))
310            }
311            (Syntax::Proto2, _, Some(ParsingStageFrequencyToken::Optional)) => {
312                Ok(Frequency::Proto2Frequency(Proto2Frequency::Optional))
313            }
314            (Syntax::Proto2, _, Some(ParsingStageFrequencyToken::Repeated)) => {
315                Ok(Frequency::Proto2Frequency(Proto2Frequency::Repeated))
316            }
317            (Syntax::Proto2, _, None) => Ok(Frequency::Proto2Frequency(Proto2Frequency::Optional)),
318            (Syntax::Proto3, FieldType::Map(..), _) => {
319                Ok(Frequency::Proto3Frequency(Proto3Frequency::Map))
320            }
321            (Syntax::Proto3, _, Some(ParsingStageFrequencyToken::Required)) => {
322                Ok(Frequency::Proto3Frequency(Proto3Frequency::Optional))
323            }
324            (Syntax::Proto3, _, Some(ParsingStageFrequencyToken::Optional)) => {
325                Ok(Frequency::Proto3Frequency(Proto3Frequency::Optional))
326            }
327            (Syntax::Proto3, _, Some(ParsingStageFrequencyToken::Repeated)) => {
328                Ok(Frequency::Proto3Frequency(Proto3Frequency::Repeated))
329            }
330            (Syntax::Proto3, _, None) => Ok(Frequency::Proto3Frequency(Proto3Frequency::Default)),
331        }
332    }
333}
334
335fn field_generic<F>(
336    syntax: Syntax,
337    freq_func: F,
338) -> impl FnMut(&str) -> IResult<&str, Field>
339where
340    F: Fn(
341        (Syntax, FieldType, Option<ParsingStageFrequencyToken>),
342    ) -> Result<Frequency, &'static str>,
343{
344    move |input| -> IResult<&str, Field> {
345        map_res(
346            (
347                opt(terminated(frequency, many1(br))),
348                terminated(field_type, many1(br)),
349                separated_pair(
350                    word,
351                    delimited(many0(br), tag("="), many0(br)),
352                    alt((integer, hex_integer)),
353                ),
354                delimited(many0(br), many0(key_val), pair(many0(br), tag(";"))),
355            ),
356            |(freq, typ, (name, number), key_vals)| {
357                Ok::<Field, &str>(Field {
358                    name,
359                    frequency: freq_func((syntax, typ.clone(), freq))?,
360                    number,
361                    default: default_check(syntax, typ.clone(), &key_vals)?,
362                    packed: key_vals.iter().find_map(|&(k, v)| {
363                        if k == "packed" {
364                            Some(v.parse().expect("Cannot parse Packed value"))
365                        } else {
366                            None
367                        }
368                    }),
369                    boxed: false,
370                    typ,
371                    deprecated: key_vals
372                        .iter()
373                        .find_map(|&(k, v)| {
374                            if k == "deprecated" {
375                                Some(v.parse().expect("Cannot parse Deprecated value"))
376                            } else {
377                                None
378                            }
379                        })
380                        .unwrap_or(false),
381                })
382            },
383        )
384        .parse(input)
385    }
386}
387
388fn message_field(syntax: Syntax) -> impl FnMut(&str) -> IResult<&str, Field> {
389    field_generic(syntax, TryInto::try_into)
390}
391
392fn oneof_message_field(syntax: Syntax) -> impl FnMut(&str) -> IResult<&str, Field> {
393    field_generic(syntax, |(syntax, _, _)| {
394        Ok(match syntax {
395            Syntax::Proto2 => Frequency::Proto2Frequency(Proto2Frequency::Required),
396            Syntax::Proto3 => Frequency::Proto3Frequency(Proto3Frequency::Optional),
397        })
398    })
399}
400
401fn one_of(syntax: Syntax) -> impl FnMut(&str) -> IResult<&str, OneOf> {
402    move |input| {
403        map(
404            pair(
405                preceded(pair(tag("oneof"), many1(br)), word),
406                delimited(
407                    pair(many0(br), tag("{")),
408                    many1(delimited(many0(br), oneof_message_field(syntax), many0(br))),
409                    tag("}"),
410                ),
411            ),
412            |(name, fields)| OneOf {
413                name,
414                fields,
415                package: "".to_string(),
416                module: "".to_string(),
417                imported: false,
418            },
419        )
420        .parse(input)
421    }
422}
423
424fn rpc_function_declaration(input: &str) -> IResult<&str, RpcFunctionDeclaration> {
425    map(
426        (
427            delimited(pair(tag("rpc"), many1(br)), word, many0(br)),
428            delimited(pair(tag("("), many0(br)), word, pair(many0(br), tag(")"))),
429            delimited(
430                (many1(br), tag("returns"), many0(br), tag("("), many0(br)),
431                word,
432                pair(many0(br), tag(")")),
433            ),
434            preceded(
435                many0(br),
436                alt((
437                    value(
438                        (),
439                        delimited(
440                            pair(tag("{"), many0(br)),
441                            many0(alt((option_ignore, value((), tag(";"))))),
442                            pair(many0(br), tag("}")),
443                        ),
444                    ),
445                    value((), tag(";")),
446                )),
447            ),
448        ),
449        |(name, arg, ret, _)| RpcFunctionDeclaration { name, arg, ret },
450    )
451    .parse(input)
452}
453
454fn rpc_service(input: &str) -> IResult<&str, RpcService> {
455    map(
456        pair(
457            delimited(pair(tag("service"), many1(br)), word, many0(br)),
458            delimited(
459                tag("{"),
460                many0(delimited(many0(br), rpc_function_declaration, many0(br))),
461                tag("}"),
462            ),
463        ),
464        |(service_name, functions)| RpcService {
465            service_name,
466            functions,
467        },
468    )
469    .parse(input)
470}
471
472fn message_event(syntax: Syntax) -> impl FnMut(&str) -> IResult<&str, MessageEvent> {
473    move |input| {
474        alt((
475            map(reserved_nums, MessageEvent::ReservedNums),
476            map(reserved_names, MessageEvent::ReservedNames),
477            map(message_field(syntax), MessageEvent::Field),
478            map(message(syntax), MessageEvent::Message),
479            map(enumerator, MessageEvent::Enumerator),
480            map(one_of(syntax), MessageEvent::OneOf),
481            map(extensions, MessageEvent::Extensions),
482            value(MessageEvent::Ignore, option_ignore),
483            value(MessageEvent::Ignore, br),
484        ))
485        .parse(input)
486    }
487}
488
489fn message(syntax: Syntax) -> impl FnMut(&str) -> IResult<&str, Message> {
490    move |input| {
491        map(
492            terminated(
493                pair(
494                    delimited(pair(tag("message"), many1(br)), word, many0(br)),
495                    delimited(tag("{"), many0(message_event(syntax)), tag("}")),
496                ),
497                opt(pair(many0(br), tag(";"))),
498            ),
499            |(name, events)| {
500                let mut msg = Message {
501                    name,
502                    ..Default::default()
503                };
504                for e in events {
505                    match e {
506                        MessageEvent::Field(f) => msg.fields.push(f),
507                        MessageEvent::ReservedNums(r) => {
508                            msg.reserved_nums = Some(r);
509                        }
510                        MessageEvent::ReservedNames(r) => {
511                            msg.reserved_names = Some(r);
512                        }
513                        MessageEvent::Message(m) => msg.messages.push(m),
514                        MessageEvent::Enumerator(e) => msg.enums.push(e),
515                        MessageEvent::OneOf(o) => msg.oneofs.push(o),
516                        MessageEvent::Extensions(e) => {
517                            msg.extensions = Some(e);
518                        }
519                        MessageEvent::Ignore => (),
520                    }
521                }
522                msg
523            },
524        )
525        .parse(input)
526    }
527}
528
529fn enum_field(input: &str) -> IResult<&str, (String, i32)> {
530    terminated(
531        separated_pair(
532            word,
533            (many0(br), tag("="), many0(br)),
534            alt((hex_integer, integer)),
535        ),
536        pair(
537            many0(alt((
538                br,
539                // TODO: add proper deprecation later. We ignore deprecated enum
540                // fields for now
541                value(
542                    (),
543                    (
544                        tag("["),
545                        many0(multispace1),
546                        tag("deprecated"),
547                        many0(multispace1),
548                        tag("="),
549                        many0(multispace1),
550                        word,
551                        many0(multispace1),
552                        tag("]"),
553                    ),
554                ),
555            ))),
556            tag(";"),
557        ),
558    )
559    .parse(input)
560}
561
562fn enum_event(input: &str) -> IResult<&str, EnumEvent> {
563    alt((
564        map(enum_field, EnumEvent::Field),
565        value(EnumEvent::Ignore, option_ignore),
566        value(EnumEvent::Ignore, br),
567    ))
568    .parse(input)
569}
570
571fn enumerator(input: &str) -> IResult<&str, Enumerator> {
572    map_res(
573        terminated(
574            pair(
575                delimited(pair(tag("enum"), many1(br)), word, many0(br)),
576                delimited(tag("{"), many0(enum_event), tag("}")),
577            ),
578            opt(pair(many0(br), tag(";"))),
579        ),
580        |(name, events)| {
581            let mut enumerator = Enumerator {
582                name,
583                ..Default::default()
584            };
585            for event in events {
586                if let EnumEvent::Field(f) = event {
587                    enumerator.fields.push(f);
588                }
589            }
590            Ok::<Enumerator, &str>(enumerator)
591        },
592    )
593    .parse(input)
594}
595
596fn option_ignore(input: &str) -> IResult<&str, ()> {
597    value(
598        (),
599        delimited(pair(tag("option"), many1(br)), take_until(";"), tag(";")),
600    )
601    .parse(input)
602}
603
604fn extend(syntax: Syntax) -> impl FnMut(&str) -> IResult<&str, Extend> {
605    move |input| {
606        map(
607            terminated(
608                pair(
609                    delimited(pair(tag("extend"), many1(br)), qualifiable_name, many0(br)),
610                    delimited(
611                        tag("{"),
612                        many1(delimited(many0(br), message_field(syntax), many0(br))),
613                        tag("}"),
614                    ),
615                ),
616                opt(pair(many0(br), tag(";"))),
617            ),
618            |(name, fields)| Extend { name, fields },
619        )
620        .parse(input)
621    }
622}
623
624fn scan_syntax(input: &str) -> IResult<&str, Syntax> {
625    map_res(separated_list0(many0(anychar), syntax), |v| {
626        Ok::<Syntax, &str>(if v.is_empty() { Syntax::Proto2 } else { v[0] })
627    })
628    .parse(input)
629}
630
631pub fn file_descriptor<'a>(
632    input: &'a str
633) -> IResult<&'a str, FileDescriptor, nom::error::Error<String>> {
634    let got_syntax = scan_syntax(input).unwrap().1;
635
636    let parser =
637        move |input: &'a str| -> IResult<&'a str, FileDescriptor, nom::error::Error<&str>> {
638            map(
639                many0(alt((
640                    map(syntax, Event::Syntax),
641                    map(import, Event::Import),
642                    map(package, Event::Package),
643                    map(message(got_syntax), Event::Message),
644                    map(enumerator, Event::Enum),
645                    map(rpc_service, Event::RpcService),
646                    map(extend(got_syntax), Event::Extend),
647                    value(Event::Ignore, option_ignore),
648                    value(Event::Ignore, br),
649                ))),
650                |events| {
651                    let mut desc = FileDescriptor::default();
652                    for event in events {
653                        match event {
654                            Event::Syntax(s) => {
655                                desc.syntax = s;
656                            }
657                            Event::Import(i) => desc.import_paths.push(i),
658                            Event::Package(p) => {
659                                desc.package = p;
660                            }
661                            Event::Message(m) => desc.messages.push(m),
662                            Event::Enum(e) => desc.enums.push(e),
663                            Event::RpcService(r) => desc.rpc_services.push(r),
664                            Event::Extend(e) => desc.message_extends.push(e),
665                            Event::Ignore => (),
666                        }
667                    }
668                    desc
669                },
670            )
671            .parse(input)
672        };
673
674    parser(input).map_err(|e: nom::Err<nom::error::Error<&str>>| e.to_owned())
675}