1mod error;
2mod lex;
3
4use std::{borrow::Cow, convert::TryFrom, iter::once};
5
6use logos::{Lexer, Logos, Span};
7use prost::Message;
8
9pub use self::error::ParseError;
10
11use self::{
12 error::ParseErrorKind,
13 lex::{Int, Token},
14};
15use crate::{
16 descriptor::{MAP_ENTRY_KEY_NUMBER, MAP_ENTRY_VALUE_NUMBER},
17 dynamic::fields::FieldDescriptorLike,
18 DynamicMessage, EnumDescriptor, FieldDescriptor, Kind, MapKey, MessageDescriptor, Value,
19};
20
21pub(in crate::dynamic::text_format) struct Parser<'a> {
22 lexer: Lexer<'a, Token<'a>>,
23 peek: Option<Result<(Token<'a>, Span), ParseErrorKind>>,
24}
25
26enum FieldName {
27 Ident(String),
28 Extension(String),
29 Any(String, String),
30}
31
32impl<'a> Parser<'a> {
33 pub fn new(input: &'a str) -> Self {
34 Parser {
35 lexer: Token::lexer(input),
36 peek: None,
37 }
38 }
39
40 pub fn parse_message(&mut self, message: &mut DynamicMessage) -> Result<(), ParseErrorKind> {
41 while self.peek()?.is_some() {
42 self.parse_field(message)?;
43 }
44 Ok(())
45 }
46
47 fn parse_message_value(
48 &mut self,
49 message: &mut DynamicMessage,
50 ) -> Result<Span, ParseErrorKind> {
51 let (terminator, start) = match self.peek()? {
52 Some((Token::LeftBrace, _)) => (Token::RightBrace, self.bump()),
53 Some((Token::LeftAngleBracket, _)) => (Token::RightAngleBracket, self.bump()),
54 _ => self.unexpected_token("'{' or '<'")?,
55 };
56
57 loop {
58 match self.peek()? {
59 Some((Token::Ident(_) | Token::LeftBracket, _)) => self.parse_field(message)?,
60 Some((tok, _)) if tok == terminator => {
61 let end = self.bump();
62 return Ok(join_span(start, end));
63 }
64 _ => self.unexpected_token(format!("'{terminator}' or a field name"))?,
65 }
66 }
67 }
68
69 fn parse_field(&mut self, message: &mut DynamicMessage) -> Result<(), ParseErrorKind> {
70 let (name, span) = self.parse_field_name()?;
71
72 match self.peek()? {
73 Some((Token::Colon, _)) => {
74 self.bump();
75 }
76 Some((Token::LeftBrace | Token::LeftAngleBracket, _)) => (),
77 _ => self.unexpected_token("':' or a message value")?,
78 };
79
80 match name {
81 FieldName::Ident(field_name) => {
82 let field = find_field(&message.desc, &field_name).ok_or_else(|| {
83 ParseErrorKind::FieldNotFound {
84 field_name,
85 message_name: message.desc.full_name().to_owned(),
86 span,
87 }
88 })?;
89
90 self.parse_field_value(message, &field)?;
91 }
92 FieldName::Extension(extension_name) => {
93 let extension = message
94 .desc
95 .get_extension_by_full_name(&extension_name)
96 .ok_or_else(|| ParseErrorKind::ExtensionNotFound {
97 extension_name,
98 message_name: message.desc.full_name().to_owned(),
99 span,
100 })?;
101
102 self.parse_field_value(message, &extension)?;
103 }
104 FieldName::Any(domain, message_name) => {
105 let value_message = match message
106 .desc
107 .parent_pool()
108 .get_message_by_name(&message_name)
109 {
110 Some(msg) => msg,
111 None => return Err(ParseErrorKind::MessageNotFound { message_name, span }),
112 };
113
114 let mut value = DynamicMessage::new(value_message);
115 self.parse_message_value(&mut value)?;
116
117 let type_url = format!("{domain}/{message_name}");
118 let value = value.encode_to_vec();
119
120 if !(message.desc.full_name() == "google.protobuf.Any"
121 && message
122 .try_set_field_by_number(1, Value::String(type_url))
123 .is_ok()
124 && message
125 .try_set_field_by_number(2, Value::Bytes(value.into()))
126 .is_ok())
127 {
128 return Err(ParseErrorKind::InvalidTypeForAny { span });
129 }
130 }
131 }
132
133 if matches!(self.peek()?, Some((Token::Comma | Token::Semicolon, _))) {
134 self.bump();
135 }
136
137 Ok(())
138 }
139
140 fn parse_field_name(&mut self) -> Result<(FieldName, Span), ParseErrorKind> {
141 match self.peek()? {
142 Some((Token::Ident(ident), _)) => Ok((FieldName::Ident(ident.to_owned()), self.bump())),
143 Some((Token::LeftBracket, _)) => {
144 let start = self.bump();
145
146 let name_or_domain = self
147 .parse_full_ident(&[Token::RightBracket, Token::ForwardSlash])?
148 .into_owned();
149 match self.peek()? {
150 Some((Token::RightBracket, _)) => {
151 let end = self.bump();
152 Ok((FieldName::Extension(name_or_domain), join_span(start, end)))
153 }
154 Some((Token::ForwardSlash, _)) => {
155 self.bump();
156 let type_name = self.parse_full_ident(&[Token::RightBracket])?;
157 let end = self.expect(Token::RightBracket)?;
158 Ok((
159 FieldName::Any(name_or_domain, type_name.into_owned()),
160 join_span(start, end),
161 ))
162 }
163 _ => self.unexpected_token("']' or '/'")?,
164 }
165 }
166 _ => self.unexpected_token("a field name")?,
167 }
168 }
169
170 fn parse_field_value(
171 &mut self,
172 message: &mut DynamicMessage,
173 field: &impl FieldDescriptorLike,
174 ) -> Result<(), ParseErrorKind> {
175 if field.is_list() {
176 let (value, _) = self.parse_repeated_value(&field.kind())?;
177 let result = message.fields.get_mut(field).as_list_mut().unwrap();
178 if let Value::List(values) = value {
179 result.extend(values);
180 } else {
181 result.push(value);
182 }
183 Ok(())
184 } else if field.is_map() {
185 fn unpack(value: Value) -> Result<(MapKey, Value), ParseErrorKind> {
186 match value {
187 Value::Message(msg) => {
188 let key = msg
189 .get_field_by_number(MAP_ENTRY_KEY_NUMBER)
190 .unwrap()
191 .into_owned()
192 .into_map_key()
193 .ok_or(ParseErrorKind::InvalidMapKey)?;
194 let value = msg
195 .get_field_by_number(MAP_ENTRY_VALUE_NUMBER)
196 .unwrap()
197 .into_owned();
198 Ok((key, value))
199 }
200 _ => panic!("map entry must be message"),
201 }
202 }
203
204 let (value, _) = self.parse_repeated_value(&field.kind())?;
205 let result = message.fields.get_mut(field).as_map_mut().unwrap();
206 if let Value::List(values) = value {
207 for value in values {
208 let (key, value) = unpack(value)?;
209 result.insert(key, value);
210 }
211 } else {
212 let (key, value) = unpack(value)?;
213 result.insert(key, value);
214 }
215 Ok(())
216 } else {
217 let kind = field.kind();
218 let (value, span) = self.parse_value(&kind)?;
219
220 if message.fields.has(field) {
221 return Err(ParseErrorKind::FieldAlreadySet {
222 field_name: field.text_name().to_owned(),
223 span,
224 });
225 } else if let Some(oneof) = field.containing_oneof() {
226 for oneof_field in oneof.fields() {
227 if message.has_field(&oneof_field) {
228 return Err(ParseErrorKind::OneofAlreadySet {
229 oneof_name: oneof.name().to_owned(),
230 span,
231 });
232 }
233 }
234 }
235 message.fields.set(field, value);
236 Ok(())
237 }
238 }
239
240 fn parse_repeated_value(&mut self, kind: &Kind) -> Result<(Value, Span), ParseErrorKind> {
241 match self.peek()? {
242 Some((Token::LeftBracket, _)) => {
243 let start = self.bump();
244
245 let mut result = Vec::new();
246
247 if let Some((Token::RightBracket, _)) = self.peek()? {
249 let end = self.bump();
250 return Ok((Value::List(result), join_span(start, end)));
251 }
252
253 result.push(self.parse_value(kind)?.0);
254
255 loop {
256 match self.peek()? {
257 Some((Token::Comma, _)) => {
258 self.bump();
259 result.push(self.parse_value(kind)?.0);
260 }
261 Some((Token::RightBracket, _)) => {
262 let end = self.bump();
263 return Ok((Value::List(result), join_span(start, end)));
264 }
265 _ => self.unexpected_token("',' or ']'")?,
266 }
267 }
268 }
269 _ => self.parse_value(kind),
270 }
271 }
272
273 fn parse_value(&mut self, kind: &Kind) -> Result<(Value, Span), ParseErrorKind> {
274 match kind {
275 Kind::Float => {
276 let (value, span) = self.parse_float()?;
277 Ok((Value::F32(value as f32), span))
278 }
279 Kind::Double => {
280 let (value, span) = self.parse_float()?;
281 Ok((Value::F64(value), span))
282 }
283 Kind::Int32 | Kind::Sint32 | Kind::Sfixed32 => {
284 let (value, span) = self.parse_i32()?;
285 Ok((Value::I32(value), span))
286 }
287 Kind::Int64 | Kind::Sint64 | Kind::Sfixed64 => {
288 let (value, span) = self.parse_i64()?;
289 Ok((Value::I64(value), span))
290 }
291 Kind::Uint32 | Kind::Fixed32 => {
292 let (value, span) = self.parse_u32()?;
293 Ok((Value::U32(value), span))
294 }
295 Kind::Uint64 | Kind::Fixed64 => {
296 let (value, span) = self.parse_u64()?;
297 Ok((Value::U64(value), span))
298 }
299 Kind::Bool => {
300 let (value, span) = self.parse_bool()?;
301 Ok((Value::Bool(value), span))
302 }
303 Kind::String => {
304 let (value, span) = self.parse_bytes()?;
305 match String::from_utf8(value) {
306 Ok(value) => Ok((Value::String(value), span)),
307 Err(_) => Err(ParseErrorKind::InvalidUtf8String { span }),
308 }
309 }
310 Kind::Bytes => {
311 let (value, span) = self.parse_bytes()?;
312 Ok((Value::Bytes(value.into()), span))
313 }
314 Kind::Message(desc) => {
315 let mut message = DynamicMessage::new(desc.clone());
316 let span = self.parse_message_value(&mut message)?;
317 Ok((Value::Message(message), span))
318 }
319 Kind::Enum(desc) => {
320 let (value, span) = self.parse_enum(desc)?;
321 Ok((Value::EnumNumber(value), span))
322 }
323 }
324 }
325
326 fn parse_float(&mut self) -> Result<(f64, Span), ParseErrorKind> {
327 let (negative, start) = match self.peek()? {
328 Some((Token::Minus, _)) => (true, self.bump()),
329 Some((_, span)) => (false, span),
330 None => self.unexpected_token("a number")?,
331 };
332
333 let (value, end) = match self.peek()? {
334 Some((Token::FloatLiteral(value), _)) => (value, self.bump()),
335 Some((Token::IntLiteral(Int { value, radix: 10 }), _)) => {
336 (value.parse().unwrap(), self.bump())
337 }
338 Some((Token::Ident(value), _))
339 if value.eq_ignore_ascii_case("inf") || value.eq_ignore_ascii_case("infinity") =>
340 {
341 (f64::INFINITY, self.bump())
342 }
343 Some((Token::Ident(value), _)) if value.eq_ignore_ascii_case("nan") => {
344 (f64::NAN, self.bump())
345 }
346 _ => self.unexpected_token("a number")?,
347 };
348
349 if negative {
350 Ok((-value, join_span(start, end)))
351 } else {
352 Ok((value, join_span(start, end)))
353 }
354 }
355
356 fn parse_i32(&mut self) -> Result<(i32, Span), ParseErrorKind> {
357 let (negative, int, span) = self.parse_int()?;
358 let converted_value = if negative {
359 u32::from_str_radix(int.value, int.radix)
360 .ok()
361 .and_then(|value| {
362 if value == (i32::MAX as u32 + 1) {
363 Some(i32::MIN)
364 } else {
365 i32::try_from(value).map(|value| -value).ok()
366 }
367 })
368 } else {
369 i32::from_str_radix(int.value, int.radix).ok()
370 };
371
372 match converted_value {
373 Some(value) => Ok((value, span)),
374 None => Err(ParseErrorKind::IntegerValueOutOfRange {
375 expected: "a signed 32-bit integer".to_owned(),
376 actual: if negative {
377 format!("-{}", int.value)
378 } else {
379 int.value.to_owned()
380 },
381 min: i32::MIN.to_string(),
382 max: i32::MAX.to_string(),
383 span,
384 }),
385 }
386 }
387
388 fn parse_i64(&mut self) -> Result<(i64, Span), ParseErrorKind> {
389 let (negative, int, span) = self.parse_int()?;
390 let converted_value = if negative {
391 u64::from_str_radix(int.value, int.radix)
392 .ok()
393 .and_then(|value| {
394 if value == (i64::MAX as u64 + 1) {
395 Some(i64::MIN)
396 } else {
397 i64::try_from(value).map(|value| -value).ok()
398 }
399 })
400 } else {
401 i64::from_str_radix(int.value, int.radix).ok()
402 };
403
404 match converted_value {
405 Some(value) => Ok((value, span)),
406 None => Err(ParseErrorKind::IntegerValueOutOfRange {
407 expected: "a signed 64-bit integer".to_owned(),
408 actual: if negative {
409 format!("-{}", int.value)
410 } else {
411 int.value.to_owned()
412 },
413 min: i64::MIN.to_string(),
414 max: i64::MAX.to_string(),
415 span,
416 }),
417 }
418 }
419
420 fn parse_u32(&mut self) -> Result<(u32, Span), ParseErrorKind> {
421 let (negative, int, span) = self.parse_int()?;
422 let converted_value = if negative {
423 None
424 } else {
425 u32::from_str_radix(int.value, int.radix).ok()
426 };
427
428 match converted_value {
429 Some(value) => Ok((value, span)),
430 None => Err(ParseErrorKind::IntegerValueOutOfRange {
431 expected: "an unsigned 32-bit integer".to_owned(),
432 actual: if negative {
433 format!("-{}", int.value)
434 } else {
435 int.value.to_string()
436 },
437 min: u32::MIN.to_string(),
438 max: u32::MAX.to_string(),
439 span,
440 }),
441 }
442 }
443
444 fn parse_u64(&mut self) -> Result<(u64, Span), ParseErrorKind> {
445 let (negative, int, span) = self.parse_int()?;
446 let converted_value = if negative {
447 None
448 } else {
449 u64::from_str_radix(int.value, int.radix).ok()
450 };
451
452 match converted_value {
453 Some(value) => Ok((value, span)),
454 None => Err(ParseErrorKind::IntegerValueOutOfRange {
455 expected: "an unsigned 64-bit integer".to_owned(),
456 actual: if negative {
457 format!("-{}", int.value)
458 } else {
459 int.value.to_string()
460 },
461 min: u64::MIN.to_string(),
462 max: u64::MAX.to_string(),
463 span,
464 }),
465 }
466 }
467
468 fn parse_int(&mut self) -> Result<(bool, Int<'a>, Span), ParseErrorKind> {
469 let (negative, start) = match self.peek()? {
470 Some((Token::Minus, _)) => (true, self.bump()),
471 Some((_, span)) => (false, span),
472 None => self.unexpected_token("an integer")?,
473 };
474
475 let (value, end) = match self.peek()? {
476 Some((Token::IntLiteral(value), _)) => (value, self.bump()),
477 _ => self.unexpected_token("an integer")?,
478 };
479
480 Ok((negative, value, join_span(start, end)))
481 }
482
483 fn parse_bool(&mut self) -> Result<(bool, Span), ParseErrorKind> {
484 match self.peek()? {
485 Some((Token::Ident("false"), _))
486 | Some((Token::Ident("False"), _))
487 | Some((Token::Ident("f"), _)) => Ok((false, self.bump())),
488 Some((Token::Ident("true"), _))
489 | Some((Token::Ident("True"), _))
490 | Some((Token::Ident("t"), _)) => Ok((true, self.bump())),
491 Some((Token::IntLiteral(v), _)) => {
492 let value = match u8::from_str_radix(v.value, v.radix) {
493 Ok(v) => v,
494 Err(_e) => return self.unexpected_token("0 or 1"),
495 };
496 if value == 1 {
497 Ok((true, self.bump()))
498 } else if value == 0 {
499 Ok((false, self.bump()))
500 } else {
501 self.unexpected_token("0 or 1")
502 }
503 }
504 _ => self.unexpected_token("'true' or 'false'"),
505 }
506 }
507
508 fn parse_bytes(&mut self) -> Result<(Vec<u8>, Span), ParseErrorKind> {
509 let (mut result, mut span) = match self.peek()? {
510 Some((Token::StringLiteral(value), _)) => (value, self.bump()),
511 _ => self.unexpected_token("a string")?,
512 };
513
514 while let Some((Token::StringLiteral(value), _)) = self.peek()? {
515 result.extend_from_slice(&value);
516 span = join_span(span, self.bump());
517 }
518
519 Ok((result, span))
520 }
521
522 fn parse_enum(&mut self, desc: &EnumDescriptor) -> Result<(i32, Span), ParseErrorKind> {
523 match self.peek()? {
524 Some((Token::Ident(name), _)) => {
525 let span = self.bump();
526 if let Some(value) = desc.get_value_by_name(name) {
527 Ok((value.number(), span))
528 } else {
529 Err(ParseErrorKind::EnumValueNotFound {
530 value_name: name.to_owned(),
531 enum_name: desc.full_name().to_owned(),
532 span,
533 })
534 }
535 }
536 Some((Token::Minus | Token::IntLiteral(_), _)) => self.parse_i32(),
537 _ => self.unexpected_token("an enum value")?,
538 }
539 }
540
541 fn parse_full_ident(&mut self, terminators: &[Token]) -> Result<Cow<'a, str>, ParseErrorKind> {
542 let mut result = match self.peek()? {
543 Some((Token::Ident(ident), _)) => Cow::Borrowed(ident),
544 _ => self.unexpected_token("an identifier")?,
545 };
546 self.bump();
547
548 loop {
549 match self.peek()? {
550 Some((Token::Dot, _)) => {
551 self.bump();
552 }
553 Some((tok, _)) if terminators.contains(&tok) => return Ok(result),
554 _ => self.unexpected_token(fmt_expected(
555 once(Token::Dot).chain(terminators.iter().cloned()),
556 ))?,
557 }
558
559 match self.peek()? {
560 Some((Token::Ident(ident), _)) => {
561 let result = result.to_mut();
562 result.push('.');
563 result.push_str(ident);
564 self.bump();
565 }
566 _ => self.unexpected_token("an identifier")?,
567 };
568 }
569 }
570
571 fn expect(&mut self, expected: Token) -> Result<Span, ParseErrorKind> {
572 if let Some((tok, _)) = self.peek()? {
573 if tok == expected {
574 return Ok(self.bump());
575 }
576 };
577
578 self.unexpected_token(expected)?
579 }
580
581 fn bump(&mut self) -> Span {
582 let (_, span) = self
583 .peek
584 .take()
585 .expect("called bump without peek returning Some()")
586 .expect("called bump on invalid token");
587 span
588 }
589
590 fn peek(&mut self) -> Result<Option<(Token<'a>, Span)>, ParseErrorKind> {
591 if self.peek.is_none() {
592 self.peek = self.next();
593 }
594 self.peek.clone().transpose()
595 }
596
597 fn next(&mut self) -> Option<Result<(Token<'a>, Span), ParseErrorKind>> {
598 debug_assert!(self.peek.is_none());
599 match self.lexer.next() {
600 Some(Err(())) => Some(Err(self.lexer.extras.error.take().unwrap_or_else(|| {
601 ParseErrorKind::InvalidToken {
602 span: self.lexer.span(),
603 }
604 }))),
605 Some(Ok(tok)) => Some(Ok((tok, self.lexer.span()))),
606 None => None,
607 }
608 }
609
610 fn unexpected_token<T>(&mut self, expected: impl ToString) -> Result<T, ParseErrorKind> {
611 match self.peek()? {
612 Some((found, span)) => Err(ParseErrorKind::UnexpectedToken {
613 expected: expected.to_string(),
614 found: found.to_string(),
615 span,
616 }),
617 None => Err(ParseErrorKind::UnexpectedEof {
618 expected: expected.to_string(),
619 }),
620 }
621 }
622}
623
624fn find_field(desc: &MessageDescriptor, name: &str) -> Option<FieldDescriptor> {
625 if let Some(field) = desc.get_field_by_name(name) {
626 if !field.is_group() {
627 return Some(field);
628 }
629 }
630
631 if let Some(field) = desc.get_field_by_name(&name.to_ascii_lowercase()) {
632 if field.is_group() && name == field.kind().as_message().unwrap().name() {
633 return Some(field);
634 }
635 }
636
637 None
638}
639
640fn fmt_expected<'a>(ts: impl Iterator<Item = Token<'a>>) -> String {
641 use std::fmt::Write;
642
643 let ts: Vec<_> = ts.collect();
644
645 let mut s = String::with_capacity(32);
646 write!(s, "'{}'", ts[0]).unwrap();
647 if ts.len() > 1 {
648 for t in &ts[1..][..ts.len() - 2] {
649 s.push_str(", ");
650 write!(s, "'{t}'").unwrap();
651 }
652 s.push_str(" or ");
653 write!(s, "'{}'", ts[ts.len() - 1]).unwrap();
654 }
655 s
656}
657
658fn join_span(start: Span, end: Span) -> Span {
659 start.start..end.end
660}