midl_parser/
parser.rs

1use std::collections::HashMap;
2use std::str;
3
4use crate::fmt;
5use crate::lexer::{
6    int, LexerError, NumLit, ParserLanguage, StrLitDecodeError, Token, Tokenizer, TokenizerError,
7};
8use crate::model::{FileDescriptor, Import, ImportVis};
9//use crate::ProtobufIdent;
10use frodobuf_schema::model::{
11    Attribute, Constant, EnumValue, Enumeration, Field, FieldType, Ident, Message, Method, Schema,
12    Service, ATTRIBUTE_ID_OPTION, ATTRIBUTE_UNNAMED,
13};
14use sha2::Digest;
15
16type SchemaHash = sha2::digest::Output<sha2::Sha256>;
17
18const SYM_LCURLY: char = '{';
19const SYM_RCURLY: char = '}';
20const SYM_SEMICOLON: char = ';';
21const SYM_LPAREN: char = '(';
22const SYM_RPAREN: char = ')';
23const SYM_EQUALS: char = '=';
24const SYM_PERIOD: char = '.';
25const SYM_COMMA: char = ',';
26const SYM_LT: char = '<';
27const SYM_GT: char = '>';
28
29/// Basic information about parsing error.
30#[derive(Debug)]
31pub enum ParserError {
32    TokenizerError(TokenizerError),
33    IncorrectInput,
34    NotUtf8,
35    ExpectConstant,
36    UnknownSyntax,
37    IntegerOverflow,
38    LabelNotAllowed,
39    LabelRequired,
40    GroupNameShouldStartWithUpperCase,
41    StrLitDecodeError(StrLitDecodeError),
42    LexerError(LexerError),
43    MapKeyType,
44    RepeatedArray,
45    DanglingAttributes,
46    DuplicateFieldNumber(u32),
47    MissingPackage,
48    Serialization(String),
49    InternalHash(String),
50    OnlyOnePackage,
51    SyntaxValue,
52}
53
54impl fmt::Display for ParserError {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        match self {
57            ParserError::TokenizerError(e) => write!(f, "{}", e),
58            ParserError::IncorrectInput => write!(f, "incorrect input"),
59            ParserError::NotUtf8 => write!(f, "not UTF-8"),
60            ParserError::ExpectConstant => write!(f, "expecting a constant"),
61            ParserError::UnknownSyntax => write!(f, "unknown syntax"),
62            ParserError::IntegerOverflow => write!(f, "integer overflow"),
63            ParserError::LabelNotAllowed => write!(f, "label not allowed"),
64            ParserError::LabelRequired => write!(f, "label required"),
65            ParserError::RepeatedArray => write!(f, "use 'repeated' or array[], but not both"),
66            ParserError::MapKeyType => write!(
67                f,
68                "unsupported map key type: must be an integer type or string"
69            ),
70            ParserError::GroupNameShouldStartWithUpperCase => {
71                write!(f, "group name should start with upper case")
72            }
73            ParserError::StrLitDecodeError(e) => write!(f, "string literal decode error: {}", e),
74            ParserError::LexerError(e) => write!(f, "lexer error: {}", e),
75            ParserError::DanglingAttributes => write!(
76                f,
77                "'@' attributes defined without applicable type or service"
78            ),
79            ParserError::DuplicateFieldNumber(n) => write!(f, "duplicate field number ({})", n),
80            ParserError::MissingPackage => write!(f, "missing required 'package' statement"),
81            ParserError::InternalHash(s) => write!(f, "internal hash error {}", s),
82            ParserError::Serialization(s) => write!(f, "serialization error: {}", s),
83            ParserError::OnlyOnePackage => {
84                write!(f, "Only one package declaration is allowed per midl file")
85            }
86            ParserError::SyntaxValue => {
87                write!(f, "Expecting \"proto2\" or \"proto3\" in syntax statement")
88            }
89        }
90    }
91}
92
93impl From<TokenizerError> for ParserError {
94    fn from(e: TokenizerError) -> Self {
95        ParserError::TokenizerError(e)
96    }
97}
98
99impl From<serde_json::Error> for ParserError {
100    fn from(e: serde_json::Error) -> Self {
101        ParserError::Serialization(e.to_string())
102    }
103}
104
105impl From<StrLitDecodeError> for ParserError {
106    fn from(e: StrLitDecodeError) -> Self {
107        ParserError::StrLitDecodeError(e)
108    }
109}
110
111impl From<LexerError> for ParserError {
112    fn from(e: LexerError) -> Self {
113        ParserError::LexerError(e)
114    }
115}
116
117impl From<int::Overflow> for ParserError {
118    fn from(_: int::Overflow) -> Self {
119        ParserError::IntegerOverflow
120    }
121}
122
123#[derive(Debug)]
124pub struct ParserErrorWithLocation {
125    pub error: ParserError,
126    /// 1-based
127    pub line: u32,
128    /// 1-based
129    pub col: u32,
130}
131
132impl fmt::Display for ParserErrorWithLocation {
133    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
134        write!(f, "at {}:{}: {}", self.line, self.col, self.error)
135    }
136}
137
138impl std::error::Error for ParserErrorWithLocation {}
139
140pub type ParserResult<T> = Result<T, ParserError>;
141
142fn attrib_for_loc(loc: &crate::lexer::Loc) -> Attribute {
143    Attribute {
144        key: Ident::from_namespace(None, "_source".to_string()),
145        values: vec![
146            ("line".to_string(), Constant::U64(loc.line as u64)),
147            ("col".to_string(), Constant::U64(loc.col as u64)),
148        ],
149    }
150}
151
152trait ToU8 {
153    fn to_u8(&self) -> ParserResult<u8>;
154}
155
156trait ToI32 {
157    fn to_i32(&self) -> ParserResult<i32>;
158}
159
160trait ToI64 {
161    fn to_i64(&self) -> ParserResult<i64>;
162}
163
164trait ToChar {
165    fn to_char(&self) -> ParserResult<char>;
166}
167
168impl ToI32 for u64 {
169    fn to_i32(&self) -> ParserResult<i32> {
170        if *self <= i32::MAX as u64 {
171            Ok(*self as i32)
172        } else {
173            Err(ParserError::IntegerOverflow)
174        }
175    }
176}
177
178impl ToI32 for i64 {
179    fn to_i32(&self) -> ParserResult<i32> {
180        if *self <= i32::MAX as i64 && *self >= i32::MIN as i64 {
181            Ok(*self as i32)
182        } else {
183            Err(ParserError::IntegerOverflow)
184        }
185    }
186}
187
188impl ToI64 for u64 {
189    fn to_i64(&self) -> Result<i64, ParserError> {
190        if *self <= i64::MAX as u64 {
191            Ok(*self as i64)
192        } else {
193            Err(ParserError::IntegerOverflow)
194        }
195    }
196}
197
198impl ToChar for u8 {
199    fn to_char(&self) -> Result<char, ParserError> {
200        if *self <= 0x7f {
201            Ok(*self as char)
202        } else {
203            Err(ParserError::NotUtf8)
204        }
205    }
206}
207
208impl ToU8 for u32 {
209    fn to_u8(&self) -> Result<u8, ParserError> {
210        if *self as u8 as u32 == *self {
211            Ok(*self as u8)
212        } else {
213            Err(ParserError::IntegerOverflow)
214        }
215    }
216}
217
218pub enum ProtobufSyntax {
219    Proto2,
220    Proto3,
221}
222
223/// Parse file into schema.
224/// Does not import any of the 'imports' or resolve foreign references
225pub fn parse_string(text: &str) -> Result<FileDescriptor, ParserErrorWithLocation> {
226    let mut parser = Parser::new(&text);
227    match parser.next_proto() {
228        Ok(r) => Ok(r),
229        Err(error) => {
230            let crate::lexer::Loc { line, col } = parser.tokenizer.loc();
231            Err(ParserErrorWithLocation { error, line, col })
232        }
233    }
234}
235
236#[derive(Clone)]
237pub(crate) struct Parser<'a> {
238    pub tokenizer: Tokenizer<'a>,
239}
240
241trait NumLitEx {
242    fn to_option_value(&self, sign_is_plus: bool) -> ParserResult<Constant>;
243}
244
245impl NumLitEx for NumLit {
246    fn to_option_value(&self, sign_is_plus: bool) -> ParserResult<Constant> {
247        Ok(match (*self, sign_is_plus) {
248            (NumLit::U64(u), true) => Constant::U64(u),
249            (NumLit::F64(f), true) => Constant::F64(f),
250            (NumLit::U64(u), false) => Constant::I64(int::neg(u)?),
251            (NumLit::F64(f), false) => Constant::F64(-f),
252        })
253    }
254}
255
256impl<'a> Parser<'a> {
257    pub fn new(input: &'a str) -> Parser<'a> {
258        Parser {
259            tokenizer: Tokenizer::new(input, ParserLanguage::Proto),
260        }
261    }
262
263    // Protobuf grammar
264
265    // fullIdent = ident { "." ident }*
266    // Also accepts '::' as separator
267    fn next_full_ident(&mut self) -> ParserResult<Ident> {
268        let mut full_ident = self.tokenizer.next_ident()?;
269        let has_path_separator = |t: &Token| {
270            if matches!(t, Token::DoubleColon | Token::Symbol(SYM_PERIOD)) {
271                Some(Token::Symbol(SYM_PERIOD))
272            } else {
273                None
274            }
275        };
276        loop {
277            if self
278                .tokenizer
279                .next_token_if_map(has_path_separator)?
280                .is_some()
281            {
282                full_ident.push_str(frodobuf_schema::model::IDENT_PATH_DELIMITER);
283            } else {
284                break;
285            }
286            full_ident.push_str(&self.tokenizer.next_ident()?);
287        }
288        Ok(full_ident.into())
289    }
290
291    // emptyStatement = ";"
292    fn next_empty_statement_opt(&mut self) -> ParserResult<Option<()>> {
293        if self.tokenizer.next_symbol_if_eq(SYM_SEMICOLON)? {
294            Ok(Some(()))
295        } else {
296            Ok(None)
297        }
298    }
299
300    // Boolean
301
302    // boolLit = "true" | "false"
303    fn next_bool_lit_opt(&mut self) -> ParserResult<Option<bool>> {
304        Ok(if self.tokenizer.next_ident_if_eq("true")? {
305            Some(true)
306        } else if self.tokenizer.next_ident_if_eq("false")? {
307            Some(false)
308        } else {
309            None
310        })
311    }
312
313    // Constant
314
315    fn next_num_lit(&mut self) -> ParserResult<NumLit> {
316        self.tokenizer
317            .next_token_check_map(|token| Ok(token.to_num_lit()?))
318    }
319
320    // lit =  ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) |
321    //            strLit | boolLit
322    fn next_lit_opt(&mut self) -> ParserResult<Option<Constant>> {
323        if let Some(b) = self.next_bool_lit_opt()? {
324            return Ok(Some(Constant::Bool(b)));
325        }
326
327        if let Token::Symbol(c) = *(self.tokenizer.lookahead_some()?) {
328            if c == '+' || c == '-' {
329                self.tokenizer.advance()?;
330                let sign = c == '+';
331                return self.next_num_lit()?.to_option_value(sign).map(Some);
332            }
333        }
334
335        if let Some(r) = self.tokenizer.next_token_if_map(|token| match *token {
336            Token::StrLit(ref s) => Some(Constant::String(s.to_string())),
337            _ => None,
338        })? {
339            return Ok(Some(r));
340        }
341
342        if matches!(
343            self.tokenizer.lookahead_some()?,
344            &Token::IntLit(..) | &Token::FloatLit(..)
345        ) {
346            return self.next_num_lit()?.to_option_value(true).map(Some);
347        }
348        Ok(None)
349    }
350
351    // constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) |
352    //            strLit | boolLit
353    fn next_constant(&mut self) -> ParserResult<Constant> {
354        if let Some(lit) = self.next_lit_opt()? {
355            return Ok(lit);
356        }
357        // We could just call next_full_ident here, but if it's not an identifier,
358        // ExpectConstant is a better error to return than ExpectIdent
359        if matches!(self.tokenizer.lookahead_some()?, &Token::Ident(..)) {
360            return Ok(Constant::Ident(self.next_full_ident()?));
361        }
362        Err(ParserError::ExpectConstant)
363    }
364
365    fn next_int_lit(&mut self) -> ParserResult<u64> {
366        self.tokenizer.next_token_check_map(|token| match *token {
367            Token::IntLit(i) => Ok(i),
368            _ => Err(ParserError::IncorrectInput),
369        })
370    }
371
372    // Import Statement
373
374    // import = "import" [ "weak" | "public" ] strLit ";"
375    fn next_import_opt(&mut self) -> ParserResult<Option<Import>> {
376        if self.tokenizer.next_ident_if_eq("import")? {
377            let vis = if self.tokenizer.next_ident_if_eq("weak")? {
378                ImportVis::Weak
379            } else if self.tokenizer.next_ident_if_eq("public")? {
380                ImportVis::Public
381            } else {
382                ImportVis::Default
383            };
384            let path = self.tokenizer.next_str_lit()?.decode_utf8()?;
385            self.tokenizer.next_symbol_expect_eq(SYM_SEMICOLON)?;
386            Ok(Some(Import { path, vis }))
387        } else {
388            Ok(None)
389        }
390    }
391
392    // Package
393
394    // package = "package" fullIdent ";"
395    fn next_package_opt(&mut self) -> ParserResult<Option<Ident>> {
396        if self.tokenizer.next_ident_if_eq("package")? {
397            let package = self.next_full_ident()?;
398            self.tokenizer.next_symbol_expect_eq(SYM_SEMICOLON)?;
399            Ok(Some(package))
400        } else {
401            Ok(None)
402        }
403    }
404
405    // syntax = "syntax" = ("proto1"|"proto3")  ";"
406    // silently ignored
407    fn next_syntax_opt(&mut self) -> ParserResult<Option<ProtobufSyntax>> {
408        if self.tokenizer.next_ident_if_eq("syntax")? {
409            self.tokenizer.next_symbol_expect_eq(SYM_EQUALS)?;
410            let syntax = match self
411                .tokenizer
412                .next_token_if_map(|token| match *token {
413                    Token::StrLit(ref s) => Some(Constant::String(s.to_string())),
414                    _ => None,
415                })?
416                .ok_or(ParserError::SyntaxValue)?
417                .as_string()
418                .ok_or(ParserError::SyntaxValue)?
419            {
420                "proto2" => ProtobufSyntax::Proto2,
421                "proto3" => ProtobufSyntax::Proto3,
422                _ => return Err(ParserError::SyntaxValue),
423            };
424            Ok(Some(syntax))
425        } else {
426            Ok(None)
427        }
428    }
429
430    // @attrib
431    // @attrib( name [=value], ... )
432    // @attrib(=value ) (anonymous value)
433
434    // trailing comma is ok
435    fn next_attribute_opt(&mut self) -> ParserResult<Option<Attribute>> {
436        if self.tokenizer.next_symbol_if_eq('@')? {
437            let key = self.next_full_ident()?;
438            let mut values = Vec::new();
439            if self.tokenizer.next_symbol_if_eq(SYM_LPAREN)? {
440                loop {
441                    if self.tokenizer.next_symbol_if_eq(SYM_RPAREN)? {
442                        break;
443                    }
444                    // lit (anon const) or  'name=value' or 'name'
445                    if let Some(lit) = self.next_lit_opt()? {
446                        // anon const , e.g., @doc("hear ye")
447                        values.push((ATTRIBUTE_UNNAMED.to_string(), lit));
448                        // optional comma
449                        let _ = self.tokenizer.next_symbol_if_eq(SYM_COMMA)?;
450                        continue;
451                    }
452                    // 'name=value' or 'name'
453                    let opt_name = self.tokenizer.next_ident()?;
454                    if self.tokenizer.next_symbol_if_eq(SYM_COMMA)? {
455                        let opt_value = Constant::Bool(true);
456                        values.push((opt_name, opt_value));
457                        continue;
458                    }
459                    if self.tokenizer.next_symbol_if_eq(SYM_RPAREN)? {
460                        let opt_value = Constant::Bool(true);
461                        values.push((opt_name, opt_value));
462                        break;
463                    }
464                    self.tokenizer.next_symbol_expect_eq(SYM_EQUALS)?;
465                    let value = self.next_constant()?;
466                    values.push((opt_name, value));
467                }
468            }
469            // optionally followed by ';'
470            let _ = self.tokenizer.next_symbol_if_eq(SYM_SEMICOLON);
471            Ok(Some(Attribute { key, values }))
472        } else {
473            Ok(None)
474        }
475    }
476
477    // option = "option" optionName  "=" constant ";"
478    // encode as attribute: "@(optionName = constant)"
479    fn next_option_opt(&mut self) -> ParserResult<Option<Attribute>> {
480        if self.tokenizer.next_ident_if_eq("option")? {
481            let name = self.next_full_ident()?;
482            self.tokenizer.next_symbol_expect_eq(SYM_EQUALS)?;
483            let value = self.next_constant()?;
484            self.tokenizer.next_symbol_expect_eq(SYM_SEMICOLON)?;
485            Ok(Some(Attribute {
486                key: Ident::from_namespace(None, ATTRIBUTE_ID_OPTION.into()),
487                values: vec![(name.to_string(), value)],
488            }))
489        } else {
490            Ok(None)
491        }
492    }
493
494    // Fields
495
496    // label = "required" | "optional" | "repeated"
497    fn next_label(&mut self) -> ParserResult<Option<Occurrence>> {
498        let map = &[
499            ("optional", Occurrence::Optional),
500            ("required", Occurrence::Required),
501            ("repeated", Occurrence::Repeated),
502        ];
503        for (name, value) in map.iter() {
504            let mut clone = self.clone();
505            if clone.tokenizer.next_ident_if_eq(name)? {
506                *self = clone;
507                return Ok(Some(value.clone()));
508            }
509        }
510        Ok(None)
511    }
512
513    fn next_field_type(&mut self) -> ParserResult<FieldType> {
514        let simple = &[
515            ("int32", FieldType::Int32),
516            ("int64", FieldType::Int64),
517            ("uint32", FieldType::Uint32),
518            ("uint64", FieldType::Uint64),
519            ("int8", FieldType::Int8),
520            ("uint8", FieldType::Uint8),
521            ("bool", FieldType::Bool),
522            ("string", FieldType::String),
523            ("bytes", FieldType::Bytes),
524            ("float", FieldType::Float32), // alias for float32
525            ("float32", FieldType::Float32),
526            ("float64", FieldType::Float64),
527            ("double", FieldType::Float64), // alias for float64
528        ];
529
530        for &(ref n, ref t) in simple {
531            if self.tokenizer.next_ident_if_eq(n)? {
532                return Ok(t.clone());
533            }
534        }
535
536        if let Some(t) = self.next_map_field_type_opt()? {
537            return Ok(t);
538        }
539
540        if let Some(t) = self.next_array_field_type_opt()? {
541            return Ok(t);
542        }
543
544        Ok(FieldType::ObjectOrEnum(self.next_full_ident()?))
545    }
546
547    fn next_field_number(&mut self) -> ParserResult<u32> {
548        // TODO: not all integers are valid field numbers
549        self.tokenizer.next_token_check_map(|token| match *token {
550            Token::IntLit(i) => Ok(i as u32),
551            _ => Err(ParserError::IncorrectInput),
552        })
553    }
554
555    // field = label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
556    fn next_field(&mut self) -> ParserResult<Field> {
557        let loc = self.tokenizer.lookahead_loc();
558        let attributes = vec![attrib_for_loc(&loc)];
559        let occurrence = self.next_label()?;
560        let typ = {
561            match (&occurrence, self.next_field_type()?) {
562                (Some(Occurrence::Repeated), FieldType::Array(_)) => {
563                    return Err(ParserError::RepeatedArray);
564                }
565                // turn 'repeated' into array
566                (Some(Occurrence::Repeated), typ) => FieldType::Array(Box::new(typ)),
567                (_, typ) => typ,
568            }
569        };
570        let name = self.tokenizer.next_ident()?;
571        let optional = self.tokenizer.next_symbol_if_eq('?')?
572            || matches!(occurrence, Some(Occurrence::Optional));
573
574        // unlike protobuf, "= num"  is optional; default to zero
575        // if zero, will be replaced in message body as 1-based sequence number
576        let number = if self.tokenizer.next_symbol_if_eq(SYM_EQUALS)? {
577            self.next_field_number()?
578        } else {
579            0
580        };
581        // must terminate with ';'
582        self.tokenizer.next_symbol_expect_eq(SYM_SEMICOLON)?;
583        let field = Field {
584            name,
585            optional,
586            typ,
587            number,
588            attributes,
589        };
590        Ok(field)
591    }
592
593    // mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
594    // keyType = "int8" | "int32" | "int64" | "uint8" | "uint32" | "uint64" | "string"
595    fn next_map_field_type_opt(&mut self) -> ParserResult<Option<FieldType>> {
596        if self.tokenizer.next_ident_if_eq("map")? {
597            self.tokenizer.next_symbol_expect_eq(SYM_LT)?;
598            let key = self.next_field_type()?;
599            if !key.is_integer() && !matches!(&key, FieldType::String) {
600                return Err(ParserError::MapKeyType);
601            }
602            self.tokenizer.next_symbol_expect_eq(SYM_COMMA)?;
603            let value = self.next_field_type()?;
604            self.tokenizer.next_symbol_expect_eq(SYM_GT)?;
605            Ok(Some(FieldType::Map(Box::new((key, value)))))
606        } else {
607            Ok(None)
608        }
609    }
610
611    // arrayField = "[" keyType  "]" ident "=" fieldNumber [ "[" fieldOptions "]" ] ";"
612    // keyType = any
613    fn next_array_field_type_opt(&mut self) -> ParserResult<Option<FieldType>> {
614        if self.tokenizer.next_symbol_if_eq('[')? {
615            let item_type = self.next_field_type()?;
616            self.tokenizer.next_symbol_expect_eq(']')?;
617            Ok(Some(FieldType::Array(Box::new(item_type))))
618        } else {
619            Ok(None)
620        }
621    }
622
623    // Top Level definitions
624
625    // Enum definition
626
627    // https://github.com/google/protobuf/issues/4561
628    fn next_enum_value(&mut self) -> ParserResult<i32> {
629        let minus = self.tokenizer.next_symbol_if_eq('-')?;
630        let lit = self.next_int_lit()?;
631        Ok(if minus {
632            let unsigned = lit.to_i64()?;
633            match unsigned.checked_neg() {
634                Some(neg) => neg.to_i32()?,
635                None => return Err(ParserError::IntegerOverflow),
636            }
637        } else {
638            lit.to_i32()?
639        })
640    }
641
642    // enumField = ident "=" intLit [ "[" enumValueOption { ","  enumValueOption } "]" ]";"
643    fn next_enum_field(&mut self) -> ParserResult<EnumValue> {
644        let name = self.tokenizer.next_ident()?;
645        self.tokenizer.next_symbol_expect_eq(SYM_EQUALS)?;
646        let number = self.next_enum_value()?;
647        Ok(EnumValue {
648            name,
649            number,
650            attributes: vec![],
651        })
652    }
653
654    // enum = "enum" enumName enumBody
655    // enumBody = "{" { option | enumField | emptyStatement } "}"
656    fn next_enum_opt(&mut self) -> ParserResult<Option<Enumeration>> {
657        if self.tokenizer.next_ident_if_eq("enum")? {
658            let name = self.tokenizer.next_ident()?;
659
660            let mut values = Vec::new();
661            let mut attributes = Vec::new();
662
663            self.tokenizer.next_symbol_expect_eq(SYM_LCURLY)?;
664            while self.tokenizer.lookahead_if_symbol()? != Some(SYM_RCURLY) {
665                // emptyStatement
666                if self.tokenizer.next_symbol_if_eq(SYM_SEMICOLON)? {
667                    continue;
668                }
669
670                // collection 'option's, append to attributes of enum
671                if let Some(attr) = self.next_option_opt()? {
672                    attributes.push(attr);
673                    continue;
674                }
675
676                values.push(self.next_enum_field()?);
677            }
678            self.tokenizer.next_symbol_expect_eq(SYM_RCURLY)?;
679            Ok(Some(Enumeration {
680                name,
681                values,
682                attributes,
683            }))
684        } else {
685            Ok(None)
686        }
687    }
688
689    // Message definition
690    // messageBody = "{" { field | enum | message |
691    //               option | mapField | reserved | emptyStatement } "}"
692    fn next_message_body(&mut self) -> ParserResult<Message> {
693        let loc = self.tokenizer.lookahead_loc();
694        self.tokenizer.next_symbol_expect_eq(SYM_LCURLY)?;
695
696        let dup_check: HashMap<u32, bool> = HashMap::new();
697        let mut message = Message::default();
698        message.attributes.push(attrib_for_loc(&loc));
699        // buffer for attributes for members of this message
700        let mut item_attributes = Vec::new();
701
702        while self.tokenizer.lookahead_if_symbol()? != Some(SYM_RCURLY) {
703            //let loc = self.tokenizer.lookahead_loc();
704
705            // emptyStatement
706            if self.tokenizer.next_symbol_if_eq(SYM_SEMICOLON)? {
707                continue;
708            }
709
710            if let Some(mut nested_message) = self.next_message_opt()? {
711                nested_message.attributes.append(&mut item_attributes);
712                message.messages.push(nested_message);
713                continue;
714            }
715
716            if let Some(mut nested_enum) = self.next_enum_opt()? {
717                nested_enum.attributes.append(&mut item_attributes);
718                message.enums.push(nested_enum);
719                continue;
720            }
721
722            if let Some(option) = self.next_option_opt()? {
723                message.attributes.push(option);
724                continue;
725            }
726
727            if let Some(attr) = self.next_attribute_opt()? {
728                item_attributes.push(attr);
729                continue;
730            }
731            let mut field = self.next_field()?;
732            field.attributes.append(&mut item_attributes);
733            if field.number == 0 {
734                field.number = message.fields.len() as u32 + 1;
735            }
736            if dup_check.contains_key(&field.number) {
737                return Err(ParserError::DuplicateFieldNumber(field.number));
738            }
739            message.fields.push(field);
740        }
741
742        if !item_attributes.is_empty() {
743            return Err(ParserError::DanglingAttributes);
744        }
745        self.tokenizer.next_symbol_expect_eq(SYM_RCURLY)?;
746
747        Ok(message)
748    }
749
750    // message = "message" messageName messageBody
751    fn next_message_opt(&mut self) -> ParserResult<Option<Message>> {
752        //let loc = self.tokenizer.lookahead_loc();
753
754        if self.tokenizer.next_ident_if_eq("message")? {
755            let name = Ident::from_namespace(None, self.tokenizer.next_ident()?);
756            let mut message = self.next_message_body()?;
757            message.name = name;
758            Ok(Some(message))
759        } else {
760            Ok(None)
761        }
762    }
763
764    // Service definition
765
766    // rpc = "rpc" rpcName "(" messageType ")"
767    //     "returns" "(" messageType ")"
768    //     (( "{" { option | emptyStatement } "}" ) | ";" )
769    fn next_rpc_opt(&mut self) -> ParserResult<Option<Method>> {
770        let has_fn_returns = |t: &Token| {
771            if matches!(t, Token::FnReturns) {
772                Some(Token::FnReturns)
773            } else {
774                None
775            }
776        };
777
778        if self.tokenizer.next_ident_if_eq("rpc")? {
779            let name = self.tokenizer.next_ident()?;
780
781            self.tokenizer.next_symbol_expect_eq(SYM_LPAREN)?;
782
783            let input_type = if self.tokenizer.next_symbol_if_eq(SYM_RPAREN)? {
784                // empty args
785                None
786            } else {
787                // non-empty args
788                let arg = self.next_field_type()?;
789                self.tokenizer.next_symbol_expect_eq(SYM_RPAREN)?;
790                Some(arg)
791            };
792            // Return type
793            // if "->" or "returns", get the return type as () or a data type
794            // if omitted (method definition ends with ;), return type is also void (None)
795            let output_type = if self.tokenizer.next_token_if_map(has_fn_returns)?.is_some()
796                || self.tokenizer.next_ident_if_eq("returns")?
797            {
798                if self.tokenizer.next_symbol_if_eq(SYM_LPAREN)? {
799                    if self.tokenizer.next_symbol_if_eq(SYM_RPAREN)? {
800                        None
801                    } else {
802                        let ret_type = self.next_field_type()?;
803                        self.tokenizer.next_symbol_expect_eq(SYM_RPAREN)?;
804                        Some(ret_type)
805                    }
806                } else {
807                    Some(self.next_field_type()?)
808                }
809            } else {
810                None
811            };
812
813            // require semicolon to terminate method definition
814            self.tokenizer.next_symbol_expect_eq(SYM_SEMICOLON)?;
815
816            Ok(Some(Method {
817                name,
818                input_type,
819                output_type,
820                attributes: Vec::new(),
821            }))
822        } else {
823            Ok(None)
824        }
825    }
826
827    // proto2:
828    // service = "service" serviceName "{" { option | fn | stream | emptyStatement } "}"
829    //
830    // proto3:
831    // service = "service" serviceName "{" { option | fn | emptyStatement } "}"
832    fn next_service_opt(&mut self) -> ParserResult<Option<Service>> {
833        let loc = self.tokenizer.lookahead_loc();
834
835        if self.tokenizer.next_ident_if_eq("service")? {
836            let name = Ident {
837                namespace: None,
838                name: self.tokenizer.next_ident()?,
839            };
840            let mut methods = Vec::new();
841            let attributes = vec![attrib_for_loc(&loc)];
842
843            let mut item_attributes = Vec::new();
844            self.tokenizer.next_symbol_expect_eq(SYM_LCURLY)?;
845            while self.tokenizer.lookahead_if_symbol()? != Some(SYM_RCURLY) {
846                if let Some(mut method) = self.next_rpc_opt()? {
847                    method.attributes.append(&mut item_attributes);
848                    methods.push(method);
849                    continue;
850                }
851
852                if let Some(a) = self.next_attribute_opt()? {
853                    item_attributes.push(a);
854                    continue;
855                }
856
857                if let Some(()) = self.next_empty_statement_opt()? {
858                    continue;
859                }
860
861                return Err(ParserError::IncorrectInput);
862            }
863            if !item_attributes.is_empty() {
864                return Err(ParserError::DanglingAttributes);
865            }
866            self.tokenizer.next_symbol_expect_eq(SYM_RCURLY)?;
867            Ok(Some(Service {
868                name,
869                methods,
870                attributes,
871                ..Default::default()
872            }))
873        } else {
874            Ok(None)
875        }
876    }
877
878    // Proto file
879
880    // proto = syntax { import | package | option | topLevelDef | emptyStatement }
881    // topLevelDef = message | enum | service
882    pub fn next_proto(&mut self) -> ParserResult<FileDescriptor> {
883        let mut imports = Vec::new();
884        let mut package = None;
885        let mut messages = Vec::new();
886        let mut enums = Vec::new();
887        let mut file_attributes = Vec::new();
888        let mut services = Vec::new();
889
890        // buffer attributes until we know what they apply to (message, enum, or service)
891        let mut inner_attributes = Vec::new();
892
893        while !self.tokenizer.syntax_eof()? {
894            if let Some(import) = self.next_import_opt()? {
895                if !inner_attributes.is_empty() {
896                    return Err(ParserError::DanglingAttributes);
897                }
898                imports.push(import);
899                continue;
900            }
901
902            if let Some(_) = self.next_syntax_opt()? {
903                // TODO: print warning, if verbose
904                // ignore
905            }
906
907            if let Some(next_package) = self.next_package_opt()? {
908                if package.is_some() {
909                    return Err(ParserError::OnlyOnePackage);
910                }
911                if !inner_attributes.is_empty() {
912                    return Err(ParserError::DanglingAttributes);
913                }
914                package = Some(next_package);
915                continue;
916            }
917
918            if let Some(attrib) = self.next_attribute_opt()? {
919                inner_attributes.push(attrib);
920                continue;
921            }
922
923            if let Some(option) = self.next_option_opt()? {
924                // can't mix @attribute and option
925                // TODO: do we need this restriction?
926                if !inner_attributes.is_empty() {
927                    return Err(ParserError::DanglingAttributes);
928                }
929                file_attributes.push(option);
930                continue;
931            }
932
933            if let Some(mut message) = self.next_message_opt()? {
934                message.attributes.append(&mut inner_attributes);
935                messages.push(message);
936                continue;
937            }
938
939            if let Some(mut enumeration) = self.next_enum_opt()? {
940                enumeration.attributes.append(&mut inner_attributes);
941                enums.push(enumeration);
942                continue;
943            }
944
945            if let Some(mut service) = self.next_service_opt()? {
946                service.attributes.append(&mut inner_attributes);
947                services.push(service);
948                continue;
949            }
950
951            if self.tokenizer.next_symbol_if_eq(SYM_SEMICOLON)? {
952                if !inner_attributes.is_empty() {
953                    return Err(ParserError::DanglingAttributes);
954                }
955                continue;
956            }
957
958            return Err(ParserError::IncorrectInput);
959        }
960        if !inner_attributes.is_empty() {
961            return Err(ParserError::DanglingAttributes);
962        }
963        let namespace = match package {
964            Some(ns) => ns,
965            None => return Err(ParserError::MissingPackage),
966        };
967
968        let mut schema = Schema {
969            namespace,
970            messages,
971            enums,
972            attributes: file_attributes,
973            ..Default::default()
974        };
975        // compute hash of everything except services:
976        // - schema namespace, all custom data types, and attributes
977        let base_hash = sha2_hash(vec![&serde_json::to_vec(&schema)?]);
978        let b64_config = base64::Config::new(base64::CharacterSet::Standard, false);
979        // for each service, hash serialized service + base hash because services depend on types
980        // but services don't depend on each other, so they each have a separate signature
981        for mut service in services.iter_mut() {
982            let serialized = serde_json::to_vec(&service)?;
983            let hash = sha2_hash(vec![&serialized, &base_hash]);
984            service.schema_id = Some(base64::encode_config(&hash, b64_config));
985            service.schema = Some(base64::encode_config(&serialized, b64_config));
986        }
987        schema.services = services;
988
989        // add parser version
990        schema.attributes.push(Attribute {
991            key: Ident::from_namespace(None, "midl_parser_version".to_string()),
992            values: vec![("_".to_string(), Constant::from(crate::MIDL_PARSER_VERSION))],
993        });
994
995        // services,
996        Ok(FileDescriptor { imports, schema })
997    }
998}
999
1000/// Compute sha-256 hash of a byte vector. Result is a 32-byte value
1001fn sha2_hash(data: Vec<&[u8]>) -> SchemaHash {
1002    let mut hash = sha2::Sha256::new();
1003    for v in data.iter() {
1004        hash.update(v)
1005    }
1006    hash.finalize()
1007}
1008
1009/// Occurrence is only used in parsing. In schema, these are translated to
1010///  Optional -> Optional
1011///  Required -> !Optional
1012///  Repeated -> Array<value>
1013#[derive(Debug, Clone)]
1014enum Occurrence {
1015    Optional,
1016    Required,
1017    Repeated,
1018}
1019
1020#[cfg(test)]
1021mod test {
1022    use super::*;
1023
1024    fn parse<P, R>(input: &str, parse_what: P) -> R
1025    where
1026        P: FnOnce(&mut Parser) -> ParserResult<R>,
1027    {
1028        let mut parser = Parser::new(input);
1029        let r =
1030            parse_what(&mut parser).expect(&format!("parse failed at {}", parser.tokenizer.loc()));
1031        let eof = parser
1032            .tokenizer
1033            .syntax_eof()
1034            .expect(&format!("check eof failed at {}", parser.tokenizer.loc()));
1035        assert!(eof, "{}", parser.tokenizer.loc());
1036        r
1037    }
1038
1039    fn parse_opt<P, R>(input: &str, parse_what: P) -> R
1040    where
1041        P: FnOnce(&mut Parser) -> ParserResult<Option<R>>,
1042    {
1043        let mut parser = Parser::new(input);
1044        let o =
1045            parse_what(&mut parser).expect(&format!("parse failed at {}", parser.tokenizer.loc()));
1046        let r = o.expect(&format!(
1047            "parser returned none at {}",
1048            parser.tokenizer.loc()
1049        ));
1050        assert!(parser.tokenizer.syntax_eof().unwrap());
1051        r
1052    }
1053
1054    #[test]
1055    fn test_message() {
1056        let msg = r#"
1057        message ReferenceData
1058    {
1059        repeated ScenarioInfo  scenarioSet = 1;
1060        repeated CalculatedObjectInfo calculatedObjectSet = 2;
1061        repeated RiskFactorList riskFactorListSet = 3;
1062        repeated RiskMaturityInfo riskMaturitySet = 4;
1063        repeated IndicatorInfo indicatorSet = 5;
1064        repeated RiskStrikeInfo riskStrikeSet = 6;
1065        repeated FreeProjectionList freeProjectionListSet = 7;
1066        repeated ValidationProperty ValidationSet = 8;
1067        repeated CalcProperties calcPropertiesSet = 9;
1068        repeated MaturityInfo maturitySet = 10;
1069    }"#;
1070
1071        let mess = parse_opt(msg, |p| p.next_message_opt());
1072        assert_eq!(10, mess.fields.len());
1073    }
1074
1075    #[test]
1076    fn test_enum() {
1077        let msg = r#"
1078        enum PairingStatus {
1079                DEALPAIRED        = 0;
1080                INVENTORYORPHAN   = 1;
1081                CALCULATEDORPHAN  = 2;
1082                CANCELED          = 3;
1083    }"#;
1084
1085        let enumeration = parse_opt(msg, |p| p.next_enum_opt());
1086        assert_eq!(4, enumeration.values.len());
1087    }
1088
1089    #[test]
1090    fn test_ignore() {
1091        let msg = r#"
1092        option optimize_for = SPEED;"#;
1093
1094        parse_opt(msg, |p| p.next_option_opt());
1095    }
1096
1097    #[test]
1098    fn test_import() {
1099        let msg = r#"package t;
1100    import "test_import_nested_imported_pb.proto";
1101
1102    message ContainsImportedNested {
1103        ContainerForNested.NestedMessage m = 1;
1104        ContainerForNested.NestedEnum e = 2;
1105    }
1106    "#;
1107        let desc = parse(msg, |p| p.next_proto());
1108
1109        assert_eq!(
1110            vec!["test_import_nested_imported_pb.proto"],
1111            desc.imports.into_iter().map(|i| i.path).collect::<Vec<_>>()
1112        );
1113    }
1114
1115    #[test]
1116    fn test_nested_message() {
1117        let msg = r#"message A
1118    {
1119        message B {
1120            repeated int32 a = 1;
1121            optional string b = 2;
1122        }
1123        optional string b = 1;
1124    }"#;
1125
1126        let mess = parse_opt(msg, |p| p.next_message_opt());
1127        assert_eq!(1, mess.messages.len());
1128    }
1129
1130    #[test]
1131    fn test_map() {
1132        let msg = r#"
1133        message A
1134    {
1135        optional map<string, int32> b = 1;
1136    }"#;
1137
1138        let mess = parse_opt(msg, |p| p.next_message_opt());
1139        assert_eq!(1, mess.fields.len());
1140        match mess.fields[0].typ {
1141            FieldType::Map(ref f) => match &**f {
1142                &(FieldType::String, FieldType::Int32) => (),
1143                ref f => panic!("Expecting Map<String, Int32> found {:?}", f),
1144            },
1145            ref f => panic!("Expecting map, got {:?}", f),
1146        }
1147    }
1148
1149    #[test]
1150    fn test_default_value_false() {
1151        let msg = r#"message Sample {
1152            @default(value=false)
1153            bool x = 1;
1154        }"#;
1155
1156        let msg = parse_opt(msg, |p| p.next_message_opt());
1157        let default_val = msg.fields[0].default_value();
1158        assert_eq!(default_val, Some(Constant::Bool(false)));
1159    }
1160
1161    #[test]
1162    fn test_default_value_true() {
1163        let msg = r#"
1164        message Sample {
1165            @default(value=true)
1166            bool x = 1;
1167        }"#;
1168
1169        let msg = parse_opt(msg, |p| p.next_message_opt());
1170        let default_val = msg.fields[0].default_value();
1171        assert_eq!(default_val, Some(Constant::Bool(true)));
1172    }
1173
1174    #[test]
1175    fn test_default_value_int() {
1176        let msg = r#"message Sample {
1177            @default(value=17)
1178            int32 x = 1;
1179        }"#;
1180
1181        let msg = parse_opt(msg, |p| p.next_message_opt());
1182        let default_val = msg.fields[0].default_value();
1183        assert_eq!(default_val, Some(Constant::U64(17)));
1184    }
1185
1186    #[test]
1187    fn test_default_value_int_neg() {
1188        let msg = r#"message Sample {
1189            @default(value= -33)
1190            int32 x = 1;
1191        }"#;
1192
1193        let msg = parse_opt(msg, |p| p.next_message_opt());
1194        let default_val = msg.fields[0].default_value();
1195        assert_eq!(default_val, Some(Constant::I64(-33)));
1196    }
1197
1198    #[test]
1199    fn test_default_value_string() {
1200        let msg = r#"
1201        message Sample {
1202            @default(value = "ab\nc d\"g\'h\0\"z");
1203            optional string x = 1;
1204        }"#;
1205
1206        let msg = parse_opt(msg, |p| p.next_message_opt());
1207        let default_val = msg.fields[0].default_value();
1208        assert_eq!(
1209            default_val,
1210            Some(Constant::String(r#""ab\nc d\"g\'h\0\"z""#.to_string()))
1211        );
1212    }
1213
1214    #[test]
1215    fn test_incorrect_file_descriptor() {
1216        let msg = r#"message Foo {
1217            dfgdg
1218        }
1219        "#;
1220
1221        let err = FileDescriptor::parse(msg).err().expect("err");
1222        assert_eq!(3, err.line);
1223    }
1224}