prost_reflect/dynamic/text_format/parse/
mod.rs

1mod error;
2mod lex;
3
4use std::{borrow::Cow, convert::TryFrom, iter::once};
5
6use logos::{Lexer, Logos, Span};
7use prost::Message;
8
9pub use self::error::ParseError;
10
11use self::{
12    error::ParseErrorKind,
13    lex::{Int, Token},
14};
15use crate::{
16    descriptor::{MAP_ENTRY_KEY_NUMBER, MAP_ENTRY_VALUE_NUMBER},
17    dynamic::fields::FieldDescriptorLike,
18    DynamicMessage, EnumDescriptor, FieldDescriptor, Kind, MapKey, MessageDescriptor, Value,
19};
20
21pub(in crate::dynamic::text_format) struct Parser<'a> {
22    lexer: Lexer<'a, Token<'a>>,
23    peek: Option<Result<(Token<'a>, Span), ParseErrorKind>>,
24}
25
26enum FieldName {
27    Ident(String),
28    Extension(String),
29    Any(String, String),
30}
31
32impl<'a> Parser<'a> {
33    pub fn new(input: &'a str) -> Self {
34        Parser {
35            lexer: Token::lexer(input),
36            peek: None,
37        }
38    }
39
40    pub fn parse_message(&mut self, message: &mut DynamicMessage) -> Result<(), ParseErrorKind> {
41        while self.peek()?.is_some() {
42            self.parse_field(message)?;
43        }
44        Ok(())
45    }
46
47    fn parse_message_value(
48        &mut self,
49        message: &mut DynamicMessage,
50    ) -> Result<Span, ParseErrorKind> {
51        let (terminator, start) = match self.peek()? {
52            Some((Token::LeftBrace, _)) => (Token::RightBrace, self.bump()),
53            Some((Token::LeftAngleBracket, _)) => (Token::RightAngleBracket, self.bump()),
54            _ => self.unexpected_token("'{' or '<'")?,
55        };
56
57        loop {
58            match self.peek()? {
59                Some((Token::Ident(_) | Token::LeftBracket, _)) => self.parse_field(message)?,
60                Some((tok, _)) if tok == terminator => {
61                    let end = self.bump();
62                    return Ok(join_span(start, end));
63                }
64                _ => self.unexpected_token(format!("'{terminator}' or a field name"))?,
65            }
66        }
67    }
68
69    fn parse_field(&mut self, message: &mut DynamicMessage) -> Result<(), ParseErrorKind> {
70        let (name, span) = self.parse_field_name()?;
71
72        match self.peek()? {
73            Some((Token::Colon, _)) => {
74                self.bump();
75            }
76            Some((Token::LeftBrace | Token::LeftAngleBracket, _)) => (),
77            _ => self.unexpected_token("':' or a message value")?,
78        };
79
80        match name {
81            FieldName::Ident(field_name) => {
82                let field = find_field(&message.desc, &field_name).ok_or_else(|| {
83                    ParseErrorKind::FieldNotFound {
84                        field_name,
85                        message_name: message.desc.full_name().to_owned(),
86                        span,
87                    }
88                })?;
89
90                self.parse_field_value(message, &field)?;
91            }
92            FieldName::Extension(extension_name) => {
93                let extension = message
94                    .desc
95                    .get_extension_by_full_name(&extension_name)
96                    .ok_or_else(|| ParseErrorKind::ExtensionNotFound {
97                        extension_name,
98                        message_name: message.desc.full_name().to_owned(),
99                        span,
100                    })?;
101
102                self.parse_field_value(message, &extension)?;
103            }
104            FieldName::Any(domain, message_name) => {
105                let value_message = match message
106                    .desc
107                    .parent_pool()
108                    .get_message_by_name(&message_name)
109                {
110                    Some(msg) => msg,
111                    None => return Err(ParseErrorKind::MessageNotFound { message_name, span }),
112                };
113
114                let mut value = DynamicMessage::new(value_message);
115                self.parse_message_value(&mut value)?;
116
117                let type_url = format!("{domain}/{message_name}");
118                let value = value.encode_to_vec();
119
120                if !(message.desc.full_name() == "google.protobuf.Any"
121                    && message
122                        .try_set_field_by_number(1, Value::String(type_url))
123                        .is_ok()
124                    && message
125                        .try_set_field_by_number(2, Value::Bytes(value.into()))
126                        .is_ok())
127                {
128                    return Err(ParseErrorKind::InvalidTypeForAny { span });
129                }
130            }
131        }
132
133        if matches!(self.peek()?, Some((Token::Comma | Token::Semicolon, _))) {
134            self.bump();
135        }
136
137        Ok(())
138    }
139
140    fn parse_field_name(&mut self) -> Result<(FieldName, Span), ParseErrorKind> {
141        match self.peek()? {
142            Some((Token::Ident(ident), _)) => Ok((FieldName::Ident(ident.to_owned()), self.bump())),
143            Some((Token::LeftBracket, _)) => {
144                let start = self.bump();
145
146                let name_or_domain = self
147                    .parse_full_ident(&[Token::RightBracket, Token::ForwardSlash])?
148                    .into_owned();
149                match self.peek()? {
150                    Some((Token::RightBracket, _)) => {
151                        let end = self.bump();
152                        Ok((FieldName::Extension(name_or_domain), join_span(start, end)))
153                    }
154                    Some((Token::ForwardSlash, _)) => {
155                        self.bump();
156                        let type_name = self.parse_full_ident(&[Token::RightBracket])?;
157                        let end = self.expect(Token::RightBracket)?;
158                        Ok((
159                            FieldName::Any(name_or_domain, type_name.into_owned()),
160                            join_span(start, end),
161                        ))
162                    }
163                    _ => self.unexpected_token("']' or '/'")?,
164                }
165            }
166            _ => self.unexpected_token("a field name")?,
167        }
168    }
169
170    fn parse_field_value(
171        &mut self,
172        message: &mut DynamicMessage,
173        field: &impl FieldDescriptorLike,
174    ) -> Result<(), ParseErrorKind> {
175        if field.is_list() {
176            let (value, _) = self.parse_repeated_value(&field.kind())?;
177            let result = message.fields.get_mut(field).as_list_mut().unwrap();
178            if let Value::List(values) = value {
179                result.extend(values);
180            } else {
181                result.push(value);
182            }
183            Ok(())
184        } else if field.is_map() {
185            fn unpack(value: Value) -> Result<(MapKey, Value), ParseErrorKind> {
186                match value {
187                    Value::Message(msg) => {
188                        let key = msg
189                            .get_field_by_number(MAP_ENTRY_KEY_NUMBER)
190                            .unwrap()
191                            .into_owned()
192                            .into_map_key()
193                            .ok_or(ParseErrorKind::InvalidMapKey)?;
194                        let value = msg
195                            .get_field_by_number(MAP_ENTRY_VALUE_NUMBER)
196                            .unwrap()
197                            .into_owned();
198                        Ok((key, value))
199                    }
200                    _ => panic!("map entry must be message"),
201                }
202            }
203
204            let (value, _) = self.parse_repeated_value(&field.kind())?;
205            let result = message.fields.get_mut(field).as_map_mut().unwrap();
206            if let Value::List(values) = value {
207                for value in values {
208                    let (key, value) = unpack(value)?;
209                    result.insert(key, value);
210                }
211            } else {
212                let (key, value) = unpack(value)?;
213                result.insert(key, value);
214            }
215            Ok(())
216        } else {
217            let kind = field.kind();
218            let (value, span) = self.parse_value(&kind)?;
219
220            if message.fields.has(field) {
221                return Err(ParseErrorKind::FieldAlreadySet {
222                    field_name: field.text_name().to_owned(),
223                    span,
224                });
225            } else if let Some(oneof) = field.containing_oneof() {
226                for oneof_field in oneof.fields() {
227                    if message.has_field(&oneof_field) {
228                        return Err(ParseErrorKind::OneofAlreadySet {
229                            oneof_name: oneof.name().to_owned(),
230                            span,
231                        });
232                    }
233                }
234            }
235            message.fields.set(field, value);
236            Ok(())
237        }
238    }
239
240    fn parse_repeated_value(&mut self, kind: &Kind) -> Result<(Value, Span), ParseErrorKind> {
241        match self.peek()? {
242            Some((Token::LeftBracket, _)) => {
243                let start = self.bump();
244
245                let mut result = Vec::new();
246
247                // Check for empty list first
248                if let Some((Token::RightBracket, _)) = self.peek()? {
249                    let end = self.bump();
250                    return Ok((Value::List(result), join_span(start, end)));
251                }
252
253                result.push(self.parse_value(kind)?.0);
254
255                loop {
256                    match self.peek()? {
257                        Some((Token::Comma, _)) => {
258                            self.bump();
259                            result.push(self.parse_value(kind)?.0);
260                        }
261                        Some((Token::RightBracket, _)) => {
262                            let end = self.bump();
263                            return Ok((Value::List(result), join_span(start, end)));
264                        }
265                        _ => self.unexpected_token("',' or ']'")?,
266                    }
267                }
268            }
269            _ => self.parse_value(kind),
270        }
271    }
272
273    fn parse_value(&mut self, kind: &Kind) -> Result<(Value, Span), ParseErrorKind> {
274        match kind {
275            Kind::Float => {
276                let (value, span) = self.parse_float()?;
277                Ok((Value::F32(value as f32), span))
278            }
279            Kind::Double => {
280                let (value, span) = self.parse_float()?;
281                Ok((Value::F64(value), span))
282            }
283            Kind::Int32 | Kind::Sint32 | Kind::Sfixed32 => {
284                let (value, span) = self.parse_i32()?;
285                Ok((Value::I32(value), span))
286            }
287            Kind::Int64 | Kind::Sint64 | Kind::Sfixed64 => {
288                let (value, span) = self.parse_i64()?;
289                Ok((Value::I64(value), span))
290            }
291            Kind::Uint32 | Kind::Fixed32 => {
292                let (value, span) = self.parse_u32()?;
293                Ok((Value::U32(value), span))
294            }
295            Kind::Uint64 | Kind::Fixed64 => {
296                let (value, span) = self.parse_u64()?;
297                Ok((Value::U64(value), span))
298            }
299            Kind::Bool => {
300                let (value, span) = self.parse_bool()?;
301                Ok((Value::Bool(value), span))
302            }
303            Kind::String => {
304                let (value, span) = self.parse_bytes()?;
305                match String::from_utf8(value) {
306                    Ok(value) => Ok((Value::String(value), span)),
307                    Err(_) => Err(ParseErrorKind::InvalidUtf8String { span }),
308                }
309            }
310            Kind::Bytes => {
311                let (value, span) = self.parse_bytes()?;
312                Ok((Value::Bytes(value.into()), span))
313            }
314            Kind::Message(desc) => {
315                let mut message = DynamicMessage::new(desc.clone());
316                let span = self.parse_message_value(&mut message)?;
317                Ok((Value::Message(message), span))
318            }
319            Kind::Enum(desc) => {
320                let (value, span) = self.parse_enum(desc)?;
321                Ok((Value::EnumNumber(value), span))
322            }
323        }
324    }
325
326    fn parse_float(&mut self) -> Result<(f64, Span), ParseErrorKind> {
327        let (negative, start) = match self.peek()? {
328            Some((Token::Minus, _)) => (true, self.bump()),
329            Some((_, span)) => (false, span),
330            None => self.unexpected_token("a number")?,
331        };
332
333        let (value, end) = match self.peek()? {
334            Some((Token::FloatLiteral(value), _)) => (value, self.bump()),
335            Some((Token::IntLiteral(Int { value, radix: 10 }), _)) => {
336                (value.parse().unwrap(), self.bump())
337            }
338            Some((Token::Ident(value), _))
339                if value.eq_ignore_ascii_case("inf") || value.eq_ignore_ascii_case("infinity") =>
340            {
341                (f64::INFINITY, self.bump())
342            }
343            Some((Token::Ident(value), _)) if value.eq_ignore_ascii_case("nan") => {
344                (f64::NAN, self.bump())
345            }
346            _ => self.unexpected_token("a number")?,
347        };
348
349        if negative {
350            Ok((-value, join_span(start, end)))
351        } else {
352            Ok((value, join_span(start, end)))
353        }
354    }
355
356    fn parse_i32(&mut self) -> Result<(i32, Span), ParseErrorKind> {
357        let (negative, int, span) = self.parse_int()?;
358        let converted_value = if negative {
359            u32::from_str_radix(int.value, int.radix)
360                .ok()
361                .and_then(|value| {
362                    if value == (i32::MAX as u32 + 1) {
363                        Some(i32::MIN)
364                    } else {
365                        i32::try_from(value).map(|value| -value).ok()
366                    }
367                })
368        } else {
369            i32::from_str_radix(int.value, int.radix).ok()
370        };
371
372        match converted_value {
373            Some(value) => Ok((value, span)),
374            None => Err(ParseErrorKind::IntegerValueOutOfRange {
375                expected: "a signed 32-bit integer".to_owned(),
376                actual: if negative {
377                    format!("-{}", int.value)
378                } else {
379                    int.value.to_owned()
380                },
381                min: i32::MIN.to_string(),
382                max: i32::MAX.to_string(),
383                span,
384            }),
385        }
386    }
387
388    fn parse_i64(&mut self) -> Result<(i64, Span), ParseErrorKind> {
389        let (negative, int, span) = self.parse_int()?;
390        let converted_value = if negative {
391            u64::from_str_radix(int.value, int.radix)
392                .ok()
393                .and_then(|value| {
394                    if value == (i64::MAX as u64 + 1) {
395                        Some(i64::MIN)
396                    } else {
397                        i64::try_from(value).map(|value| -value).ok()
398                    }
399                })
400        } else {
401            i64::from_str_radix(int.value, int.radix).ok()
402        };
403
404        match converted_value {
405            Some(value) => Ok((value, span)),
406            None => Err(ParseErrorKind::IntegerValueOutOfRange {
407                expected: "a signed 64-bit integer".to_owned(),
408                actual: if negative {
409                    format!("-{}", int.value)
410                } else {
411                    int.value.to_owned()
412                },
413                min: i64::MIN.to_string(),
414                max: i64::MAX.to_string(),
415                span,
416            }),
417        }
418    }
419
420    fn parse_u32(&mut self) -> Result<(u32, Span), ParseErrorKind> {
421        let (negative, int, span) = self.parse_int()?;
422        let converted_value = if negative {
423            None
424        } else {
425            u32::from_str_radix(int.value, int.radix).ok()
426        };
427
428        match converted_value {
429            Some(value) => Ok((value, span)),
430            None => Err(ParseErrorKind::IntegerValueOutOfRange {
431                expected: "an unsigned 32-bit integer".to_owned(),
432                actual: if negative {
433                    format!("-{}", int.value)
434                } else {
435                    int.value.to_string()
436                },
437                min: u32::MIN.to_string(),
438                max: u32::MAX.to_string(),
439                span,
440            }),
441        }
442    }
443
444    fn parse_u64(&mut self) -> Result<(u64, Span), ParseErrorKind> {
445        let (negative, int, span) = self.parse_int()?;
446        let converted_value = if negative {
447            None
448        } else {
449            u64::from_str_radix(int.value, int.radix).ok()
450        };
451
452        match converted_value {
453            Some(value) => Ok((value, span)),
454            None => Err(ParseErrorKind::IntegerValueOutOfRange {
455                expected: "an unsigned 64-bit integer".to_owned(),
456                actual: if negative {
457                    format!("-{}", int.value)
458                } else {
459                    int.value.to_string()
460                },
461                min: u64::MIN.to_string(),
462                max: u64::MAX.to_string(),
463                span,
464            }),
465        }
466    }
467
468    fn parse_int(&mut self) -> Result<(bool, Int<'a>, Span), ParseErrorKind> {
469        let (negative, start) = match self.peek()? {
470            Some((Token::Minus, _)) => (true, self.bump()),
471            Some((_, span)) => (false, span),
472            None => self.unexpected_token("an integer")?,
473        };
474
475        let (value, end) = match self.peek()? {
476            Some((Token::IntLiteral(value), _)) => (value, self.bump()),
477            _ => self.unexpected_token("an integer")?,
478        };
479
480        Ok((negative, value, join_span(start, end)))
481    }
482
483    fn parse_bool(&mut self) -> Result<(bool, Span), ParseErrorKind> {
484        match self.peek()? {
485            Some((Token::Ident("false"), _))
486            | Some((Token::Ident("False"), _))
487            | Some((Token::Ident("f"), _)) => Ok((false, self.bump())),
488            Some((Token::Ident("true"), _))
489            | Some((Token::Ident("True"), _))
490            | Some((Token::Ident("t"), _)) => Ok((true, self.bump())),
491            Some((Token::IntLiteral(v), _)) => {
492                let value = match u8::from_str_radix(v.value, v.radix) {
493                    Ok(v) => v,
494                    Err(_e) => return self.unexpected_token("0 or 1"),
495                };
496                if value == 1 {
497                    Ok((true, self.bump()))
498                } else if value == 0 {
499                    Ok((false, self.bump()))
500                } else {
501                    self.unexpected_token("0 or 1")
502                }
503            }
504            _ => self.unexpected_token("'true' or 'false'"),
505        }
506    }
507
508    fn parse_bytes(&mut self) -> Result<(Vec<u8>, Span), ParseErrorKind> {
509        let (mut result, mut span) = match self.peek()? {
510            Some((Token::StringLiteral(value), _)) => (value, self.bump()),
511            _ => self.unexpected_token("a string")?,
512        };
513
514        while let Some((Token::StringLiteral(value), _)) = self.peek()? {
515            result.extend_from_slice(&value);
516            span = join_span(span, self.bump());
517        }
518
519        Ok((result, span))
520    }
521
522    fn parse_enum(&mut self, desc: &EnumDescriptor) -> Result<(i32, Span), ParseErrorKind> {
523        match self.peek()? {
524            Some((Token::Ident(name), _)) => {
525                let span = self.bump();
526                if let Some(value) = desc.get_value_by_name(name) {
527                    Ok((value.number(), span))
528                } else {
529                    Err(ParseErrorKind::EnumValueNotFound {
530                        value_name: name.to_owned(),
531                        enum_name: desc.full_name().to_owned(),
532                        span,
533                    })
534                }
535            }
536            Some((Token::Minus | Token::IntLiteral(_), _)) => self.parse_i32(),
537            _ => self.unexpected_token("an enum value")?,
538        }
539    }
540
541    fn parse_full_ident(&mut self, terminators: &[Token]) -> Result<Cow<'a, str>, ParseErrorKind> {
542        let mut result = match self.peek()? {
543            Some((Token::Ident(ident), _)) => Cow::Borrowed(ident),
544            _ => self.unexpected_token("an identifier")?,
545        };
546        self.bump();
547
548        loop {
549            match self.peek()? {
550                Some((Token::Dot, _)) => {
551                    self.bump();
552                }
553                Some((tok, _)) if terminators.contains(&tok) => return Ok(result),
554                _ => self.unexpected_token(fmt_expected(
555                    once(Token::Dot).chain(terminators.iter().cloned()),
556                ))?,
557            }
558
559            match self.peek()? {
560                Some((Token::Ident(ident), _)) => {
561                    let result = result.to_mut();
562                    result.push('.');
563                    result.push_str(ident);
564                    self.bump();
565                }
566                _ => self.unexpected_token("an identifier")?,
567            };
568        }
569    }
570
571    fn expect(&mut self, expected: Token) -> Result<Span, ParseErrorKind> {
572        if let Some((tok, _)) = self.peek()? {
573            if tok == expected {
574                return Ok(self.bump());
575            }
576        };
577
578        self.unexpected_token(expected)?
579    }
580
581    fn bump(&mut self) -> Span {
582        let (_, span) = self
583            .peek
584            .take()
585            .expect("called bump without peek returning Some()")
586            .expect("called bump on invalid token");
587        span
588    }
589
590    fn peek(&mut self) -> Result<Option<(Token<'a>, Span)>, ParseErrorKind> {
591        if self.peek.is_none() {
592            self.peek = self.next();
593        }
594        self.peek.clone().transpose()
595    }
596
597    fn next(&mut self) -> Option<Result<(Token<'a>, Span), ParseErrorKind>> {
598        debug_assert!(self.peek.is_none());
599        match self.lexer.next() {
600            Some(Err(())) => Some(Err(self.lexer.extras.error.take().unwrap_or_else(|| {
601                ParseErrorKind::InvalidToken {
602                    span: self.lexer.span(),
603                }
604            }))),
605            Some(Ok(tok)) => Some(Ok((tok, self.lexer.span()))),
606            None => None,
607        }
608    }
609
610    fn unexpected_token<T>(&mut self, expected: impl ToString) -> Result<T, ParseErrorKind> {
611        match self.peek()? {
612            Some((found, span)) => Err(ParseErrorKind::UnexpectedToken {
613                expected: expected.to_string(),
614                found: found.to_string(),
615                span,
616            }),
617            None => Err(ParseErrorKind::UnexpectedEof {
618                expected: expected.to_string(),
619            }),
620        }
621    }
622}
623
624fn find_field(desc: &MessageDescriptor, name: &str) -> Option<FieldDescriptor> {
625    if let Some(field) = desc.get_field_by_name(name) {
626        if !field.is_group() {
627            return Some(field);
628        }
629    }
630
631    if let Some(field) = desc.get_field_by_name(&name.to_ascii_lowercase()) {
632        if field.is_group() && name == field.kind().as_message().unwrap().name() {
633            return Some(field);
634        }
635    }
636
637    None
638}
639
640fn fmt_expected<'a>(ts: impl Iterator<Item = Token<'a>>) -> String {
641    use std::fmt::Write;
642
643    let ts: Vec<_> = ts.collect();
644
645    let mut s = String::with_capacity(32);
646    write!(s, "'{}'", ts[0]).unwrap();
647    if ts.len() > 1 {
648        for t in &ts[1..][..ts.len() - 2] {
649            s.push_str(", ");
650            write!(s, "'{t}'").unwrap();
651        }
652        s.push_str(" or ");
653        write!(s, "'{}'", ts[ts.len() - 1]).unwrap();
654    }
655    s
656}
657
658fn join_span(start: Span, end: Span) -> Span {
659    start.start..end.end
660}