1use std::{collections::BTreeMap, str::FromStr};
7
8use crate::{
9 Error, Float, Result, SimpleValue, Value,
10 float::Inner,
11 tag,
12 util::{trim_leading_zeros, u8_from_base64_digit, u8_from_hex_digit, u64_from_slice},
13};
14
15impl FromStr for Value {
16 type Err = Error;
17
18 fn from_str(s: &str) -> Result<Self> {
19 let mut parser = Parser::new(s.as_bytes());
20 parser.skip_ws()?;
21 let value = parser.parse_value()?;
22 parser.skip_ws()?;
23 if parser.pos != parser.src.len() {
24 return Err(Error::InvalidFormat);
25 }
26 Ok(value)
27 }
28}
29
30struct Parser<'a> {
31 src: &'a [u8],
32 pos: usize,
33}
34
35impl<'a> Parser<'a> {
36 fn new(src: &'a [u8]) -> Self {
37 Self { src, pos: 0 }
38 }
39
40 fn peek(&self) -> Option<u8> {
41 self.src.get(self.pos).copied()
42 }
43
44 fn peek_at(&self, offset: usize) -> Option<u8> {
45 self.src.get(self.pos + offset).copied()
46 }
47
48 fn advance(&mut self) -> Result<u8> {
49 let byte = self.peek().ok_or(Error::InvalidFormat)?;
50 self.pos += 1;
51 Ok(byte)
52 }
53
54 fn eat(&mut self, byte: u8) -> bool {
55 let found = self.peek() == Some(byte);
56 if found {
57 self.pos += 1
58 }
59 found
60 }
61
62 fn expect(&mut self, byte: u8) -> Result<()> {
63 if self.eat(byte) {
64 Ok(())
65 } else {
66 Err(Error::InvalidFormat)
67 }
68 }
69
70 fn starts_with(&self, prefix: &[u8]) -> bool {
71 self.src[self.pos..].starts_with(prefix)
72 }
73
74 fn consume(&mut self, prefix: &[u8]) -> bool {
75 let found = self.starts_with(prefix);
76 if found {
77 self.pos += prefix.len();
78 }
79 found
80 }
81
82 fn skip_ws(&mut self) -> Result<()> {
83 loop {
84 match self.peek() {
85 Some(b' ' | b'\t' | b'\r' | b'\n') => self.pos += 1,
86 Some(b'#') => {
87 while let Some(b) = self.peek() {
88 self.pos += 1;
89 if b == b'\n' {
90 break;
91 }
92 }
93 }
94 Some(b'/') => {
95 self.pos += 1;
96 loop {
97 match self.peek() {
98 Some(b'/') => {
99 self.pos += 1;
100 break;
101 }
102 Some(_) => self.pos += 1,
103 None => return Err(Error::InvalidFormat),
104 }
105 }
106 }
107 _ => return Ok(()),
108 }
109 }
110 }
111
112 fn parse_value(&mut self) -> Result<Value> {
113 self.skip_ws()?;
114 let byte = self.peek().ok_or(Error::InvalidFormat)?;
115 match byte {
116 b'[' => self.parse_array(),
117 b'{' => self.parse_map(),
118 b'"' => self.parse_text_string(),
119 b'\'' => self.parse_single_quoted_bstr(),
120 b'<' => self.parse_embedded_bstr(),
121 b'-' => {
122 if self.consume(b"-Infinity") {
123 Ok(Value::float(f64::NEG_INFINITY))
124 } else {
125 self.parse_number_or_tag()
126 }
127 }
128 b'0'..=b'9' => self.parse_number_or_tag(),
129 b'N' if self.consume(b"NaN") => Ok(Value::Float(Float(Inner::F16(0x7e00)))),
130 b'I' if self.consume(b"Infinity") => Ok(Value::float(f64::INFINITY)),
131 b't' if self.consume(b"true") => Ok(Value::from(true)),
132 b'n' if self.consume(b"null") => Ok(Value::null()),
133 b's' if self.consume(b"simple(") => self.parse_simple_tail(),
134 b'h' if self.peek_at(1) == Some(b'\'') => {
135 self.pos += 2;
136 self.parse_hex_bstr_tail()
137 }
138 b'b' if self.consume(b"b64'") => self.parse_b64_bstr_tail(),
139 b'f' => {
140 if self.consume(b"false") {
141 Ok(Value::from(false))
142 } else if self.consume(b"float'") {
143 self.parse_float_hex_tail()
144 } else {
145 Err(Error::InvalidFormat)
146 }
147 }
148 _ => Err(Error::InvalidFormat),
149 }
150 }
151
152 fn parse_array(&mut self) -> Result<Value> {
153 self.expect(b'[')?;
154 self.skip_ws()?;
155 let mut items = Vec::new();
156 if self.eat(b']') {
157 Ok(Value::Array(items))
158 } else {
159 loop {
160 items.push(self.parse_value()?);
161 self.skip_ws()?;
162 if self.eat(b',') {
163 continue;
164 } else if self.eat(b']') {
165 break Ok(Value::Array(items));
166 } else {
167 break Err(Error::InvalidFormat);
168 }
169 }
170 }
171 }
172
173 fn parse_map(&mut self) -> Result<Value> {
174 self.expect(b'{')?;
175 self.skip_ws()?;
176 let mut map: BTreeMap<Value, Value> = BTreeMap::new();
177 if self.eat(b'}') {
178 Ok(Value::Map(map))
179 } else {
180 loop {
181 let key = self.parse_value()?;
182 self.skip_ws()?;
183 self.expect(b':')?;
184 let value = self.parse_value()?;
185 if map.insert(key, value).is_some() {
186 return Err(Error::NonDeterministic);
187 }
188 self.skip_ws()?;
189 if self.eat(b',') {
190 continue;
191 } else if self.eat(b'}') {
192 break Ok(Value::Map(map));
193 } else {
194 break Err(Error::InvalidFormat);
195 }
196 }
197 }
198 }
199
200 fn parse_number_or_tag(&mut self) -> Result<Value> {
201 let negative = self.eat(b'-');
202 let value = if self.peek() == Some(b'0') {
203 match self.peek_at(1) {
204 Some(b'b' | b'B') => {
205 self.pos += 2;
206 self.parse_integer_base(negative, 2)?
207 }
208 Some(b'o' | b'O') => {
209 self.pos += 2;
210 self.parse_integer_base(negative, 8)?
211 }
212 Some(b'x' | b'X') => {
213 self.pos += 2;
214 self.parse_integer_base(negative, 16)?
215 }
216 _ => self.parse_decimal(negative)?,
217 }
218 } else {
219 self.parse_decimal(negative)?
220 };
221
222 self.skip_ws()?;
223 if self.peek() == Some(b'(') {
224 self.pos += 1;
225 let Value::Unsigned(tag_number) = value else {
226 return Err(Error::InvalidFormat);
227 };
228 let inner = self.parse_value()?;
229 self.skip_ws()?;
230 self.expect(b')')?;
231 Ok(Value::tag(tag_number, inner))
232 } else {
233 Ok(value)
234 }
235 }
236
237 fn parse_decimal(&mut self, negative: bool) -> Result<Value> {
238 let start = self.pos;
239 while let Some(b) = self.peek()
240 && b.is_ascii_digit()
241 {
242 self.pos += 1;
243 }
244 if self.pos == start {
245 return Err(Error::InvalidFormat);
246 }
247 let int_end = self.pos;
248 if self.peek() == Some(b'.') {
249 self.pos += 1;
250 let frac_start = self.pos;
251 while let Some(b) = self.peek()
252 && b.is_ascii_digit()
253 {
254 self.pos += 1;
255 }
256 if self.pos == frac_start {
257 return Err(Error::InvalidFormat);
258 }
259 if matches!(self.peek(), Some(b'e' | b'E')) {
260 self.pos += 1;
261 if matches!(self.peek(), Some(b'+' | b'-')) {
262 self.pos += 1;
263 }
264 let exp_start = self.pos;
265 while let Some(b) = self.peek()
266 && b.is_ascii_digit()
267 {
268 self.pos += 1;
269 }
270 if self.pos == exp_start {
271 return Err(Error::InvalidFormat);
272 }
273 }
274 let text = std::str::from_utf8(&self.src[start..self.pos]).unwrap();
275 let mut parsed: f64 = text.parse().map_err(|_| Error::InvalidFormat)?;
276 if negative {
277 parsed = -parsed;
278 }
279 return Ok(Value::float(parsed));
280 }
281
282 let digits = &self.src[start..int_end];
283 let bytes = digits_to_be_bytes(digits, 10)?;
284 be_bytes_to_value(&bytes, negative)
285 }
286
287 fn parse_integer_base(&mut self, negative: bool, base: u32) -> Result<Value> {
288 let mut digits: Vec<u8> = Vec::new();
289 let mut last_was_digit = false;
290 while let Some(b) = self.peek() {
291 if b == b'_' {
292 if !last_was_digit {
293 return Err(Error::InvalidFormat);
294 } else {
295 self.pos += 1;
296 last_was_digit = false;
297 continue;
298 }
299 } else {
300 let is_valid = match base {
301 2 => matches!(b, b'0' | b'1'),
302 8 => matches!(b, b'0'..=b'7'),
303 16 => b.is_ascii_hexdigit(),
304 _ => unreachable!(),
305 };
306 if !is_valid {
307 break;
308 }
309 digits.push(b);
310 last_was_digit = true;
311 self.pos += 1;
312 }
313 }
314 if digits.is_empty() || !last_was_digit {
315 Err(Error::InvalidFormat)
316 } else {
317 let bytes = digits_to_be_bytes(&digits, base)?;
318 be_bytes_to_value(&bytes, negative)
319 }
320 }
321
322 fn parse_simple_tail(&mut self) -> Result<Value> {
323 self.skip_ws()?;
324 let start = self.pos;
325 while let Some(b) = self.peek()
326 && b.is_ascii_digit()
327 {
328 self.pos += 1;
329 }
330 if self.pos == start {
331 Err(Error::InvalidFormat)
332 } else {
333 let text = std::str::from_utf8(&self.src[start..self.pos]).unwrap();
334 let number: u8 = text.parse().map_err(|_| Error::InvalidFormat)?;
335 self.skip_ws()?;
336 self.expect(b')')?;
337 Ok(Value::from(SimpleValue::try_from(number)?))
338 }
339 }
340
341 fn parse_float_hex_tail(&mut self) -> Result<Value> {
342 let start = self.pos;
343 while let Some(b) = self.peek()
344 && b != b'\''
345 {
346 self.pos += 1;
347 }
348 let hex = &self.src[start..self.pos];
349 self.expect(b'\'')?;
350 let mut bits: u64 = 0;
351 for &byte in hex {
352 let digit = u8_from_hex_digit(byte)? as u64;
353 bits = (bits << 4) | digit;
354 }
355 match hex.len() {
356 4 => Ok(Value::Float(Float::from_u16(bits as u16))),
357 8 => Ok(Value::Float(Float::from_u32(bits as u32)?)),
358 16 => Ok(Value::Float(Float::from_u64(bits)?)),
359 _ => Err(Error::InvalidFormat),
360 }
361 }
362
363 fn parse_hex_bstr_tail(&mut self) -> Result<Value> {
364 let mut bytes = Vec::new();
365 let mut half: Option<u8> = None;
366 loop {
367 let byte = self.advance()?;
368 match byte {
369 b'\'' => {
370 if half.is_some() {
371 return Err(Error::InvalidFormat);
372 } else {
373 return Ok(Value::ByteString(bytes));
374 }
375 }
376 b' ' | b'\t' | b'\r' | b'\n' => continue,
377 _ => {
378 let digit = u8_from_hex_digit(byte)?;
379 match half.take() {
380 None => half = Some(digit),
381 Some(high) => bytes.push((high << 4) | digit),
382 }
383 }
384 }
385 }
386 }
387
388 fn parse_b64_bstr_tail(&mut self) -> Result<Value> {
389 let mut data: Vec<u8> = Vec::new();
390 loop {
391 let byte = self.advance()?;
392 match byte {
393 b'\'' => return Ok(Value::ByteString(decode_base64(&data)?)),
394 b' ' | b'\t' | b'\r' | b'\n' => continue,
395 _ => data.push(byte),
396 }
397 }
398 }
399
400 fn parse_text_string(&mut self) -> Result<Value> {
401 self.expect(b'"')?;
402 let mut out = String::new();
403 loop {
404 let start = self.pos;
405 while let Some(b) = self.peek()
406 && !matches!(b, b'"' | b'\\' | b'\r')
407 {
408 self.pos += 1;
409 }
410 let slice = std::str::from_utf8(&self.src[start..self.pos]).map_err(|_| Error::InvalidUtf8)?;
411 out.push_str(slice);
412 let byte = self.peek().ok_or(Error::InvalidFormat)?;
413 match byte {
414 b'"' => {
415 self.pos += 1;
416 return Ok(Value::from(out));
417 }
418 b'\r' => {
419 self.pos += 1;
420 self.eat(b'\n');
421 out.push('\n');
422 }
423 b'\\' => {
424 self.pos += 1;
425 if !self.read_escape_into_string(&mut out)? {
426 }
428 }
429 _ => unreachable!(),
430 }
431 }
432 }
433
434 fn parse_single_quoted_bstr(&mut self) -> Result<Value> {
435 self.expect(b'\'')?;
436 let mut out: Vec<u8> = Vec::new();
437 loop {
438 let start = self.pos;
439 while let Some(b) = self.peek()
440 && !matches!(b, b'\'' | b'\\' | b'\r')
441 {
442 self.pos += 1;
443 }
444 out.extend_from_slice(&self.src[start..self.pos]);
445 let byte = self.peek().ok_or(Error::InvalidFormat)?;
446 match byte {
447 b'\'' => {
448 self.pos += 1;
449 return Ok(Value::ByteString(out));
450 }
451 b'\r' => {
452 self.pos += 1;
453 self.eat(b'\n');
454 out.push(b'\n');
455 }
456 b'\\' => {
457 self.pos += 1;
458 let mut tmp = String::new();
459 if self.read_escape_into_string(&mut tmp)? {
460 out.extend_from_slice(tmp.as_bytes());
461 }
462 }
463 _ => unreachable!(),
464 }
465 }
466 }
467
468 fn read_escape_into_string(&mut self, out: &mut String) -> Result<bool> {
472 let byte = self.advance()?;
473 let ch = match byte {
474 b'\'' => '\'',
475 b'"' => '"',
476 b'\\' => '\\',
477 b'b' => '\u{08}',
478 b'f' => '\u{0C}',
479 b'n' => '\n',
480 b'r' => '\r',
481 b't' => '\t',
482 b'u' => self.read_u_escape()?,
483 b'\n' => return Ok(false),
484 b'\r' => {
485 self.eat(b'\n');
486 return Ok(false);
487 }
488 _ => return Err(Error::InvalidFormat),
489 };
490 out.push(ch);
491 Ok(true)
492 }
493
494 fn read_u_escape(&mut self) -> Result<char> {
495 let high = self.read_4_hex()?;
496 if (0xD800..=0xDBFF).contains(&high) {
497 if !self.consume(b"\\u") {
498 return Err(Error::InvalidFormat);
499 }
500 let low = self.read_4_hex()?;
501 if !(0xDC00..=0xDFFF).contains(&low) {
502 return Err(Error::InvalidFormat);
503 }
504 let code = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
505 char::from_u32(code).ok_or(Error::InvalidFormat)
506 } else if (0xDC00..=0xDFFF).contains(&high) {
507 Err(Error::InvalidFormat)
508 } else {
509 char::from_u32(high).ok_or(Error::InvalidFormat)
510 }
511 }
512
513 fn read_4_hex(&mut self) -> Result<u32> {
514 let mut code: u32 = 0;
515 for _ in 0..4 {
516 let byte = self.advance()?;
517 let digit = u8_from_hex_digit(byte)? as u32;
518 code = (code << 4) | digit;
519 }
520 Ok(code)
521 }
522
523 fn parse_embedded_bstr(&mut self) -> Result<Value> {
524 self.expect(b'<')?;
525 self.expect(b'<')?;
526 let mut buf = Vec::new();
527 self.skip_ws()?;
528 if self.consume(b">>") {
529 Ok(Value::ByteString(buf))
530 } else {
531 loop {
532 let value = self.parse_value()?;
533 buf.extend(value.encode());
534 self.skip_ws()?;
535 if self.eat(b',') {
536 continue;
537 } else if self.consume(b">>") {
538 return Ok(Value::ByteString(buf));
539 } else {
540 return Err(Error::InvalidFormat);
541 }
542 }
543 }
544 }
545}
546
547fn decode_base64(input: &[u8]) -> Result<Vec<u8>> {
548 let mut data = input;
549 while let Some(stripped) = data.strip_suffix(b"=") {
550 data = stripped;
551 }
552
553 if data.len() % 4 == 1 {
554 return Err(Error::InvalidFormat);
555 }
556
557 let mut out = Vec::with_capacity(data.len() * 3 / 4);
558 let mut buf: u32 = 0;
559 let mut bits: u32 = 0;
560
561 for &byte in data {
562 let value = u8_from_base64_digit(byte)? as u32;
563 buf = (buf << 6) | value;
564 bits += 6;
565 if bits >= 8 {
566 bits -= 8;
567 out.push((buf >> bits) as u8);
568 buf &= (1 << bits) - 1;
569 }
570 }
571
572 if buf == 0 { Ok(out) } else { Err(Error::InvalidFormat) }
573}
574
575fn digits_to_be_bytes(digits: &[u8], base: u32) -> Result<Vec<u8>> {
578 let mut result = vec![0u8];
579
580 for &digit in digits {
581 let value = match digit {
582 b'0'..=b'9' => (digit - b'0') as u32,
583 b'a'..=b'f' => (digit - b'a' + 10) as u32,
584 b'A'..=b'F' => (digit - b'A' + 10) as u32,
585 _ => return Err(Error::InvalidFormat),
586 };
587
588 if value >= base {
589 return Err(Error::InvalidFormat);
590 }
591
592 let mut carry = value;
593
594 for byte in result.iter_mut().rev() {
595 let product = (*byte as u32) * base + carry;
596 *byte = product as u8;
597 carry = product >> 8;
598 }
599
600 while carry > 0 {
601 result.insert(0, carry as u8);
602 carry >>= 8;
603 }
604 }
605
606 Ok(result)
607}
608
609fn be_bytes_to_value(bytes: &[u8], negative: bool) -> Result<Value> {
611 let bytes = trim_leading_zeros(bytes);
612
613 if bytes.is_empty() {
614 Ok(Value::Unsigned(0))
615 } else if !negative {
616 if bytes.len() <= 8 {
617 Ok(Value::Unsigned(u64_from_slice(bytes)?))
618 } else {
619 Ok(Value::tag(tag::POS_BIG_INT, Value::from(bytes)))
620 }
621 } else {
622 let mut sub = bytes.to_vec();
623 let mut idx = sub.len();
624 loop {
625 idx -= 1;
626 if sub[idx] > 0 {
627 sub[idx] -= 1;
628 break;
629 } else {
630 sub[idx] = 0xff;
631 }
632 }
633 let sub = trim_leading_zeros(&sub);
634 if sub.len() <= 8 {
635 Ok(Value::Negative(u64_from_slice(sub)?))
636 } else {
637 Ok(Value::tag(tag::NEG_BIG_INT, Value::from(sub)))
638 }
639 }
640}