1use std::{collections::BTreeMap, str::FromStr};
7
8use crate::{
9 Error, Float, SimpleValue, Value,
10 error::WithEof,
11 float::Inner,
12 io::{MyReader, SliceReader},
13 limits, tag,
14 util::{trim_leading_zeros, u8_from_base64_digit, u8_from_hex_digit, u64_from_slice},
15};
16
17impl FromStr for Value {
18 type Err = Error;
19
20 fn from_str(s: &str) -> Result<Self, Error> {
21 let mut parser = Parser::new(SliceReader(s.as_bytes()), limits::RECURSION_LIMIT);
22 parser.parse_complete()
23 }
24}
25
26pub(crate) struct Parser<R: MyReader> {
31 reader: R,
32 buf: [u8; 16],
33 buf_len: usize,
34 depth: u16,
35}
36
37impl<R: MyReader> Parser<R> {
38 pub(crate) fn new(inner: R, recursion_limit: u16) -> Self {
39 Self {
40 reader: inner,
41 buf: [0; _],
42 buf_len: 0,
43 depth: recursion_limit,
44 }
45 }
46
47 pub(crate) fn parse_complete(&mut self) -> Result<Value, R::Error> {
51 self.skip_whitespace()?;
52 let value = self.parse_value()?;
53 self.skip_whitespace()?;
54 if !self.at_end()? {
55 Err(Error::InvalidFormat.into())
56 } else {
57 Ok(value)
58 }
59 }
60
61 pub(crate) fn parse_stream_item(&mut self) -> Result<Value, R::Error> {
68 self.skip_whitespace()?;
69 let value = self.parse_value()?;
70 self.consume_trailing_separator()?;
71 Ok(value)
72 }
73
74 pub(crate) fn parse_seq_item(&mut self) -> Result<Option<Value>, R::Error> {
79 self.skip_whitespace()?;
80 if self.at_end()? {
81 Ok(None)
82 } else {
83 let value = self.parse_value()?;
84 self.consume_trailing_separator()?;
85 Ok(Some(value))
86 }
87 }
88
89 fn consume_trailing_separator(&mut self) -> Result<(), R::Error> {
93 self.skip_whitespace()?;
94 if self.at_end()? || self.eat(b',')? {
95 Ok(())
96 } else {
97 Err(Error::InvalidFormat.into())
98 }
99 }
100
101 fn enter(&mut self) -> Result<(), R::Error> {
102 self.depth = self.depth.checked_sub(1).ok_or(Error::NestingTooDeep)?;
103 Ok(())
104 }
105
106 fn leave(&mut self) {
107 self.depth += 1;
108 }
109
110 fn ensure(&mut self, n: usize) -> Result<(), R::Error> {
111 while self.buf_len < n {
112 let [b] = self.reader.read_bytes::<1>()?;
113 self.buf[self.buf_len] = b;
114 self.buf_len += 1;
115 }
116 Ok(())
117 }
118
119 fn peek(&mut self) -> Result<Option<u8>, R::Error> {
120 self.peek_at(0)
121 }
122
123 fn peek_at(&mut self, offset: usize) -> Result<Option<u8>, R::Error> {
124 match self.ensure(offset + 1) {
125 Ok(()) => Ok(Some(self.buf[offset])),
126 Err(e) if e.is_eof() => Ok(None),
127 Err(e) => Err(e),
128 }
129 }
130
131 fn advance(&mut self) -> Result<u8, R::Error> {
132 self.ensure(1)?;
133 let byte = self.buf[0];
134 self.buf.copy_within(1..self.buf_len, 0);
135 self.buf_len -= 1;
136 Ok(byte)
137 }
138
139 fn skip(&mut self, len: usize) -> Result<(), R::Error> {
140 debug_assert!(len <= self.buf_len);
141 self.buf.copy_within(len..self.buf_len, 0);
142 self.buf_len -= len;
143 Ok(())
144 }
145
146 fn eat(&mut self, byte: u8) -> Result<bool, R::Error> {
147 if self.peek()? == Some(byte) {
148 self.skip(1)?;
149 Ok(true)
150 } else {
151 Ok(false)
152 }
153 }
154
155 fn expect(&mut self, byte: u8) -> Result<(), R::Error> {
156 if self.eat(byte)? {
157 Ok(())
158 } else {
159 Err(Error::InvalidFormat.into())
160 }
161 }
162
163 fn consume(&mut self, prefix: &[u8]) -> Result<bool, R::Error> {
165 for (i, &b) in prefix.iter().enumerate() {
166 if self.peek_at(i)? != Some(b) {
167 return Ok(false);
168 }
169 }
170 self.skip(prefix.len())?;
171 Ok(true)
172 }
173
174 fn skip_whitespace(&mut self) -> Result<(), R::Error> {
175 loop {
176 while matches!(self.peek()?, Some(b' ' | b'\t' | b'\r' | b'\n')) {
177 self.skip(1)?;
178 }
179
180 if self.eat(b'#')? {
181 while let Some(b) = self.peek()?
182 && b != b'\n'
183 {
184 self.skip(1)?;
185 }
186 } else if self.eat(b'/')? {
187 while self.advance()? != b'/' {}
188 } else {
189 return Ok(());
190 }
191 }
192 }
193
194 fn at_end(&mut self) -> Result<bool, R::Error> {
195 Ok(self.peek()?.is_none())
196 }
197
198 fn parse_value(&mut self) -> Result<Value, R::Error> {
199 self.skip_whitespace()?;
200 let byte = self.peek()?.ok_or(Error::UnexpectedEof)?;
201 match byte {
202 b'[' => self.parse_array(),
203 b'{' => self.parse_map(),
204 b'"' => self.parse_text_string(),
205 b'\'' => self.parse_single_quoted_bstr(),
206 b'<' => self.parse_embedded_bstr(),
207 b'-' => {
208 if self.consume(b"-Infinity")? {
209 Ok(Value::float(f64::NEG_INFINITY))
210 } else {
211 self.parse_number_or_tag()
212 }
213 }
214 b'0'..=b'9' => self.parse_number_or_tag(),
215 b'N' if self.consume(b"NaN")? => Ok(Value::Float(Float(Inner::F16(0x7e00)))),
216 b'I' if self.consume(b"Infinity")? => Ok(Value::float(f64::INFINITY)),
217 b't' if self.consume(b"true")? => Ok(Value::from(true)),
218 b'f' if self.consume(b"false")? => Ok(Value::from(false)),
219 b'n' if self.consume(b"null")? => Ok(Value::null()),
220 b's' if self.consume(b"simple(")? => self.parse_simple_tail(),
221 b'h' if self.consume(b"h\'")? => self.parse_hex_bstr_tail(),
222 b'b' if self.consume(b"b64'")? => self.parse_b64_bstr_tail(),
223 b'f' if self.consume(b"float'")? => self.parse_float_hex_tail(),
224 _ => Err(Error::InvalidFormat.into()),
225 }
226 }
227
228 fn parse_array(&mut self) -> Result<Value, R::Error> {
229 self.expect(b'[')?;
230 self.skip_whitespace()?;
231 let mut items = Vec::new();
232 if self.eat(b']')? {
233 Ok(Value::Array(items))
234 } else {
235 self.enter()?;
236 let result = loop {
237 items.push(self.parse_value()?);
238 self.skip_whitespace()?;
239 if self.eat(b',')? {
240 continue;
241 } else if self.eat(b']')? {
242 break Ok(Value::Array(items));
243 } else {
244 break Err(Error::InvalidFormat.into());
245 }
246 };
247 self.leave();
248 result
249 }
250 }
251
252 fn parse_map(&mut self) -> Result<Value, R::Error> {
253 self.expect(b'{')?;
254 self.skip_whitespace()?;
255 let mut map: BTreeMap<Value, Value> = BTreeMap::new();
256 if self.eat(b'}')? {
257 Ok(Value::Map(map))
258 } else {
259 self.enter()?;
260 let result = loop {
261 let key = self.parse_value()?;
262 self.skip_whitespace()?;
263 if let Err(error) = self.expect(b':') {
264 break Err(error);
265 }
266 let value = self.parse_value()?;
267 if map.insert(key, value).is_some() {
268 break Err(Error::NonDeterministic.into());
269 }
270 self.skip_whitespace()?;
271 if self.eat(b',')? {
272 continue;
273 } else if self.eat(b'}')? {
274 break Ok(Value::Map(map));
275 } else {
276 break Err(Error::InvalidFormat.into());
277 }
278 };
279 self.leave();
280 result
281 }
282 }
283
284 fn parse_number_or_tag(&mut self) -> Result<Value, R::Error> {
285 let negative = self.eat(b'-')?;
286
287 let value = if self.peek()? == Some(b'0') {
288 match self.peek_at(1)? {
289 Some(b'b' | b'B') => {
290 self.skip(2)?;
291 self.parse_integer_base(negative, 2)?
292 }
293 Some(b'o' | b'O') => {
294 self.skip(2)?;
295 self.parse_integer_base(negative, 8)?
296 }
297 Some(b'x' | b'X') => {
298 self.skip(2)?;
299 self.parse_integer_base(negative, 16)?
300 }
301 _ => self.parse_decimal(negative)?,
302 }
303 } else {
304 self.parse_decimal(negative)?
305 };
306
307 self.skip_whitespace()?;
308
309 if self.eat(b'(')? {
310 let Value::Unsigned(tag_number) = value else {
311 return Err(Error::InvalidFormat.into());
312 };
313 self.enter()?;
314 let inner = self.parse_value();
315 self.leave();
316 let inner = inner?;
317 self.skip_whitespace()?;
318 self.expect(b')')?;
319 Ok(Value::tag(tag_number, inner))
320 } else {
321 Ok(value)
322 }
323 }
324
325 fn parse_decimal(&mut self, negative: bool) -> Result<Value, R::Error> {
326 let mut int_digits: Vec<u8> = Vec::new();
327 while let Some(b) = self.peek()?
328 && b.is_ascii_digit()
329 {
330 int_digits.push(b);
331 self.skip(1)?;
332 }
333 if int_digits.is_empty() {
334 return Err(Error::InvalidFormat.into());
335 }
336 if self.peek()? == Some(b'.') {
337 let mut text: Vec<u8> = int_digits;
338 text.push(self.advance()?);
339 let frac_start = text.len();
340 while let Some(b) = self.peek()?
341 && b.is_ascii_digit()
342 {
343 text.push(b);
344 self.skip(1)?;
345 }
346 if text.len() == frac_start {
347 return Err(Error::InvalidFormat.into());
348 }
349 if matches!(self.peek()?, Some(b'e' | b'E')) {
350 text.push(self.advance()?);
351 if matches!(self.peek()?, Some(b'+' | b'-')) {
352 text.push(self.advance()?);
353 }
354 let exp_start = text.len();
355 while let Some(b) = self.peek()?
356 && b.is_ascii_digit()
357 {
358 text.push(b);
359 self.skip(1)?;
360 }
361 if text.len() == exp_start {
362 return Err(Error::InvalidFormat.into());
363 }
364 }
365 let text = std::str::from_utf8(&text).unwrap();
366 let mut parsed: f64 = text.parse().map_err(|_| Error::InvalidFormat)?;
367 if negative {
368 parsed = -parsed;
369 }
370 return Ok(Value::float(parsed));
371 }
372
373 let bytes = digits_to_be_bytes(&int_digits, 10)?;
374 Ok(be_bytes_to_value(&bytes, negative)?)
375 }
376
377 fn parse_integer_base(&mut self, negative: bool, base: u32) -> Result<Value, R::Error> {
378 let mut digits: Vec<u8> = Vec::new();
379 let mut last_was_digit = false;
380 while let Some(b) = self.peek()? {
381 if b == b'_' {
382 if !last_was_digit {
383 return Err(Error::InvalidFormat.into());
384 } else {
385 self.skip(1)?;
386 last_was_digit = false;
387 continue;
388 }
389 } else {
390 let is_valid = match base {
391 2 => matches!(b, b'0' | b'1'),
392 8 => matches!(b, b'0'..=b'7'),
393 16 => b.is_ascii_hexdigit(),
394 _ => unreachable!(),
395 };
396 if !is_valid {
397 break;
398 }
399 digits.push(b);
400 last_was_digit = true;
401 self.skip(1)?;
402 }
403 }
404 if digits.is_empty() || !last_was_digit {
405 Err(Error::InvalidFormat.into())
406 } else {
407 let bytes = digits_to_be_bytes(&digits, base)?;
408 Ok(be_bytes_to_value(&bytes, negative)?)
409 }
410 }
411
412 fn parse_simple_tail(&mut self) -> Result<Value, R::Error> {
413 self.skip_whitespace()?;
414 let mut digits: Vec<u8> = Vec::new();
415 while let Some(b) = self.peek()?
416 && b.is_ascii_digit()
417 {
418 digits.push(b);
419 self.skip(1)?;
420 }
421 if digits.is_empty() {
422 Err(Error::InvalidFormat.into())
423 } else {
424 let text = std::str::from_utf8(&digits).unwrap();
425 let number: u8 = text.parse().map_err(|_| Error::InvalidFormat)?;
426 self.skip_whitespace()?;
427 self.expect(b')')?;
428 Ok(Value::from(SimpleValue::try_from(number)?))
429 }
430 }
431
432 fn parse_float_hex_tail(&mut self) -> Result<Value, R::Error> {
433 let mut hex: Vec<u8> = Vec::new();
434 while let Some(b) = self.peek()?
435 && b != b'\''
436 {
437 hex.push(b);
438 self.skip(1)?;
439 }
440 self.expect(b'\'')?;
441 let mut bits: u64 = 0;
442 for &byte in &hex {
443 let digit = u8_from_hex_digit(byte)? as u64;
444 bits = (bits << 4) | digit;
445 }
446 match hex.len() {
447 4 => Ok(Value::Float(Float::from_bits_u16(bits as u16))),
448 8 => Ok(Value::Float(Float::from_bits_u32(bits as u32)?)),
449 16 => Ok(Value::Float(Float::from_bits_u64(bits)?)),
450 _ => Err(Error::InvalidFormat.into()),
451 }
452 }
453
454 fn parse_hex_bstr_tail(&mut self) -> Result<Value, R::Error> {
455 let mut bytes = Vec::new();
456 let mut half: Option<u8> = None;
457 loop {
458 match self.advance()? {
459 b'\'' => {
460 if half.is_some() {
461 return Err(Error::InvalidFormat.into());
462 } else {
463 return Ok(Value::ByteString(bytes));
464 }
465 }
466 b' ' | b'\t' | b'\r' | b'\n' => continue,
467 byte => {
468 let digit = u8_from_hex_digit(byte)?;
469 match half.take() {
470 None => half = Some(digit),
471 Some(high) => bytes.push((high << 4) | digit),
472 }
473 }
474 }
475 }
476 }
477
478 fn parse_b64_bstr_tail(&mut self) -> Result<Value, R::Error> {
479 let mut data: Vec<u8> = Vec::new();
480 loop {
481 match self.advance()? {
482 b'\'' => return Ok(Value::ByteString(decode_base64(&data)?)),
483 b' ' | b'\t' | b'\r' | b'\n' => continue,
484 byte => data.push(byte),
485 }
486 }
487 }
488
489 fn parse_text_string(&mut self) -> Result<Value, R::Error> {
490 self.expect(b'"')?;
491 let mut buf: Vec<u8> = Vec::new();
492 loop {
493 match self.advance()? {
494 b'"' => {
495 let text = String::try_from(buf).map_err(|_| Error::InvalidUtf8)?;
496 return Ok(Value::from(text));
497 }
498 b'\r' => {
499 self.eat(b'\n')?;
500 buf.push(b'\n');
501 }
502 b'\\' => {
503 self.read_escape_into_string(&mut buf)?;
504 }
505 byte => {
506 buf.push(byte);
507 }
508 }
509 }
510 }
511
512 fn parse_single_quoted_bstr(&mut self) -> Result<Value, R::Error> {
513 self.expect(b'\'')?;
514 let mut bytes: Vec<u8> = Vec::new();
515 loop {
516 match self.advance()? {
517 b'\'' => {
518 return Ok(Value::ByteString(bytes));
519 }
520 b'\r' => {
521 self.eat(b'\n')?;
522 bytes.push(b'\n');
523 }
524 b'\\' => {
525 self.read_escape_into_string(&mut bytes)?;
526 }
527 byte => {
528 bytes.push(byte);
529 }
530 }
531 }
532 }
533
534 fn read_escape_into_string(&mut self, out: &mut Vec<u8>) -> Result<bool, R::Error> {
538 let byte = self.advance()?;
539 let ch = match byte {
540 b'\'' => '\'',
541 b'"' => '"',
542 b'\\' => '\\',
543 b'b' => '\u{08}',
544 b'f' => '\u{0C}',
545 b'n' => '\n',
546 b'r' => '\r',
547 b't' => '\t',
548 b'u' => self.read_u_escape()?,
549 b'\n' => return Ok(false),
550 b'\r' => {
551 self.eat(b'\n')?;
552 return Ok(false);
553 }
554 _ => return Err(Error::InvalidFormat.into()),
555 };
556 let mut buf = [0; 4];
557 let s = ch.encode_utf8(&mut buf);
558 out.extend_from_slice(s.as_bytes());
559
560 Ok(true)
562 }
563
564 fn read_u_escape(&mut self) -> Result<char, R::Error> {
565 let high = self.read_4_hex()?;
566 if (0xD800..=0xDBFF).contains(&high) {
567 if !self.consume(b"\\u")? {
568 return Err(Error::InvalidFormat.into());
569 }
570 let low = self.read_4_hex()?;
571 if !(0xDC00..=0xDFFF).contains(&low) {
572 return Err(Error::InvalidFormat.into());
573 }
574 let code = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
575 char::from_u32(code).ok_or_else(|| Error::InvalidFormat.into())
576 } else if (0xDC00..=0xDFFF).contains(&high) {
577 Err(Error::InvalidFormat.into())
578 } else {
579 char::from_u32(high).ok_or_else(|| Error::InvalidFormat.into())
580 }
581 }
582
583 fn read_4_hex(&mut self) -> Result<u32, R::Error> {
584 let mut code: u32 = 0;
585 for _ in 0..4 {
586 let byte = self.advance()?;
587 let digit = u8_from_hex_digit(byte)? as u32;
588 code = (code << 4) | digit;
589 }
590 Ok(code)
591 }
592
593 fn parse_embedded_bstr(&mut self) -> Result<Value, R::Error> {
594 self.expect(b'<')?;
595 self.expect(b'<')?;
596 let mut buf = Vec::new();
597 self.skip_whitespace()?;
598 if self.consume(b">>")? {
599 Ok(Value::ByteString(buf))
600 } else {
601 self.enter()?;
602 let result = loop {
603 let value = self.parse_value()?;
604 buf.extend(value.encode());
605 self.skip_whitespace()?;
606 if self.eat(b',')? {
607 continue;
608 } else if self.consume(b">>")? {
609 break Ok(Value::ByteString(buf));
610 } else {
611 break Err(Error::InvalidFormat.into());
612 }
613 };
614 self.leave();
615 result
616 }
617 }
618}
619
620fn decode_base64(input: &[u8]) -> Result<Vec<u8>, Error> {
621 let mut data = input;
622 while let Some(stripped) = data.strip_suffix(b"=") {
623 data = stripped;
624 }
625
626 if data.len() % 4 == 1 {
627 return Err(Error::InvalidFormat);
628 }
629
630 let mut out = Vec::with_capacity(data.len() * 3 / 4);
631 let mut buf: u32 = 0;
632 let mut bits: u32 = 0;
633
634 for &byte in data {
635 let value = u8_from_base64_digit(byte)? as u32;
636 buf = (buf << 6) | value;
637 bits += 6;
638 if bits >= 8 {
639 bits -= 8;
640 out.push((buf >> bits) as u8);
641 buf &= (1 << bits) - 1;
642 }
643 }
644
645 if buf == 0 { Ok(out) } else { Err(Error::InvalidFormat) }
646}
647
648fn digits_to_be_bytes(digits: &[u8], base: u32) -> Result<Vec<u8>, Error> {
651 let mut result = vec![0u8];
652
653 for &digit in digits {
654 let value = match digit {
655 b'0'..=b'9' => (digit - b'0') as u32,
656 b'a'..=b'f' => (digit - b'a' + 10) as u32,
657 b'A'..=b'F' => (digit - b'A' + 10) as u32,
658 _ => return Err(Error::InvalidFormat),
659 };
660
661 if value >= base {
662 return Err(Error::InvalidFormat);
663 }
664
665 let mut carry = value;
666
667 for byte in result.iter_mut().rev() {
668 let product = (*byte as u32) * base + carry;
669 *byte = product as u8;
670 carry = product >> 8;
671 }
672
673 while carry > 0 {
674 result.insert(0, carry as u8);
675 carry >>= 8;
676 }
677 }
678
679 Ok(result)
680}
681
682fn be_bytes_to_value(bytes: &[u8], negative: bool) -> Result<Value, Error> {
684 let bytes = trim_leading_zeros(bytes);
685
686 if bytes.is_empty() {
687 Ok(Value::Unsigned(0))
688 } else if !negative {
689 if bytes.len() <= 8 {
690 Ok(Value::Unsigned(u64_from_slice(bytes)?))
691 } else {
692 Ok(Value::tag(tag::POS_BIG_INT, Value::from(bytes)))
693 }
694 } else {
695 let mut sub = bytes.to_vec();
696 let mut idx = sub.len();
697 loop {
698 idx -= 1;
699 if sub[idx] > 0 {
700 sub[idx] -= 1;
701 break;
702 } else {
703 sub[idx] = 0xff;
704 }
705 }
706 let sub = trim_leading_zeros(&sub);
707 if sub.len() <= 8 {
708 Ok(Value::Negative(u64_from_slice(sub)?))
709 } else {
710 Ok(Value::tag(tag::NEG_BIG_INT, Value::from(sub)))
711 }
712 }
713}