1use std::char;
6use std::io;
7use std::marker::PhantomData;
8use std::str;
9
10use serde::de;
11
12use super::error::{Error, ErrorCode, Result};
13use super::util::StringReader;
14use super::util::{Number, ParseNumber};
15
16enum State {
17 Normal,
18 Root,
19 Keyname,
20}
21
22pub struct Deserializer<Iter: Iterator<Item = u8>> {
24 rdr: StringReader<Iter>,
25 str_buf: Vec<u8>,
26 state: State,
27}
28
29impl<Iter> Deserializer<Iter>
39where
40 Iter: Iterator<Item = u8>,
41{
42 #[inline]
44 pub fn new(rdr: Iter) -> Deserializer<Iter> {
45 Deserializer {
46 rdr: StringReader::new(rdr),
47 str_buf: Vec::with_capacity(128),
48 state: State::Normal,
49 }
50 }
51
52 #[inline]
54 pub fn new_for_root(rdr: Iter) -> Deserializer<Iter> {
55 let mut res = Deserializer::new(rdr);
56 res.state = State::Root;
57 res
58 }
59
60 #[inline]
64 pub fn end(&mut self) -> Result<()> {
65 self.rdr.parse_whitespace()?;
66 if self.rdr.eof()? {
67 Ok(())
68 } else {
69 Err(self.rdr.error(ErrorCode::TrailingCharacters))
70 }
71 }
72
73 fn is_punctuator_char(ch: u8) -> bool {
74 match ch {
75 b'{' | b'}' | b'[' | b']' | b',' | b':' => true,
76 _ => false,
77 }
78 }
79
80 fn parse_keyname<'de, V>(&mut self, visitor: V) -> Result<V::Value>
81 where
82 V: de::Visitor<'de>,
83 {
84 self.str_buf.clear();
89
90 let mut space: Option<usize> = None;
91 loop {
92 let ch = self.rdr.next_char_or_null()?;
93
94 if ch == b':' {
95 if self.str_buf.is_empty() {
96 return Err(self.rdr.error(ErrorCode::Custom(
97 "Found ':' but no key name (for an empty key name use quotes)".to_string(),
98 )));
99 } else if space.is_some() && space.unwrap() != self.str_buf.len() {
100 return Err(self.rdr.error(ErrorCode::Custom(
101 "Found whitespace in your key name (use quotes to include)".to_string(),
102 )));
103 }
104 self.rdr.uneat_char(ch);
105 let s = str::from_utf8(&self.str_buf).unwrap();
106 return visitor.visit_str(s);
107 } else if ch <= b' ' {
108 if ch == 0 {
109 return Err(self.rdr.error(ErrorCode::EOFWhileParsingObject));
110 } else if space.is_none() {
111 space = Some(self.str_buf.len());
112 }
113 } else if Self::is_punctuator_char(ch) {
114 return Err(self.rdr.error(ErrorCode::Custom("Found a punctuator where a key name was expected (check your syntax or use quotes if the key name includes {}[],: or whitespace)".to_string())));
115 } else {
116 self.str_buf.push(ch);
117 }
118 }
119 }
120
121 fn parse_value<'de, V>(&mut self, visitor: V) -> Result<V::Value>
122 where
123 V: de::Visitor<'de>,
124 {
125 self.rdr.parse_whitespace()?;
126
127 if self.rdr.eof()? {
128 return Err(self.rdr.error(ErrorCode::EOFWhileParsingValue));
129 }
130
131 match self.state {
132 State::Keyname => {
133 self.state = State::Normal;
134 return self.parse_keyname(visitor);
135 }
136 State::Root => {
137 self.state = State::Normal;
138 return self.visit_map(true, visitor);
139 }
140 State::Normal => {}
141 }
142
143 match self.rdr.peek_or_null()? {
144 b'"' => {
154 self.rdr.eat_char();
155 self.parse_string()?;
156 let s = str::from_utf8(&self.str_buf).unwrap();
157 visitor.visit_str(s)
158 }
159 b'[' => {
160 self.rdr.eat_char();
161 let ret = visitor.visit_seq(SeqVisitor::new(self))?;
162 self.rdr.parse_whitespace()?;
163 match self.rdr.next_char()? {
164 Some(b']') => Ok(ret),
165 Some(_) => Err(self.rdr.error(ErrorCode::TrailingCharacters)),
166 None => Err(self.rdr.error(ErrorCode::EOFWhileParsingList)),
167 }
168 }
169 b'{' => {
170 self.rdr.eat_char();
171 self.visit_map(false, visitor)
172 }
173 b'\x00' => Err(self.rdr.error(ErrorCode::ExpectedSomeValue)),
174 _ => self.parse_tfnns(visitor),
175 }
176 }
177
178 fn visit_map<'de, V>(&mut self, root: bool, visitor: V) -> Result<V::Value>
179 where
180 V: de::Visitor<'de>,
181 {
182 let ret = visitor.visit_map(MapVisitor::new(self, root))?;
183 self.rdr.parse_whitespace()?;
184 match self.rdr.next_char()? {
185 Some(b'}') => {
186 if !root {
187 Ok(ret)
188 } else {
189 Err(self.rdr.error(ErrorCode::TrailingCharacters))
190 } }
192 Some(_) => Err(self.rdr.error(ErrorCode::TrailingCharacters)),
193 None => {
194 if root {
195 Ok(ret)
196 } else {
197 Err(self.rdr.error(ErrorCode::EOFWhileParsingObject))
198 }
199 }
200 }
201 }
202
203 fn parse_ident(&mut self, ident: &[u8]) -> Result<()> {
204 for c in ident {
205 if Some(*c) != self.rdr.next_char()? {
206 return Err(self.rdr.error(ErrorCode::ExpectedSomeIdent));
207 }
208 }
209
210 Ok(())
211 }
212
213 fn parse_tfnns<'de, V>(&mut self, visitor: V) -> Result<V::Value>
214 where
215 V: de::Visitor<'de>,
216 {
217 self.str_buf.clear();
220
221 let first = self.rdr.peek()?.unwrap();
222
223 if Self::is_punctuator_char(first) {
224 return Err(self.rdr.error(ErrorCode::PunctuatorInQlString));
225 }
226
227 loop {
228 let ch = self.rdr.next_char_or_null()?;
229
230 let is_eol = ch == b'\r' || ch == b'\n' || ch == b'\x00';
231 let is_comment = ch == b'#'
232 || ch == b'/' && {
233 let next = self.rdr.peek_or_null()?;
234 next == b'/' || next == b'*'
235 };
236 if is_eol || is_comment || ch == b',' || ch == b'}' || ch == b']' {
237 let chf = self.str_buf[0];
238 match chf {
239 b'f' => {
240 if str::from_utf8(&self.str_buf).unwrap().trim() == "false" {
241 self.rdr.uneat_char(ch);
242 return visitor.visit_bool(false);
243 }
244 }
245 b'n' => {
246 if str::from_utf8(&self.str_buf).unwrap().trim() == "null" {
247 self.rdr.uneat_char(ch);
248 return visitor.visit_unit();
249 }
250 }
251 b't' => {
252 if str::from_utf8(&self.str_buf).unwrap().trim() == "true" {
253 self.rdr.uneat_char(ch);
254 return visitor.visit_bool(true);
255 }
256 }
257 _ => {
258 if chf == b'-' || chf.is_ascii_digit() {
259 let mut pn = ParseNumber::new(self.str_buf.iter().copied());
260 match pn.parse(false) {
261 Ok(Number::F64(v)) => {
262 self.rdr.uneat_char(ch);
263 return visitor.visit_f64(v);
264 }
265 Ok(Number::U64(v)) => {
266 self.rdr.uneat_char(ch);
267 return visitor.visit_u64(v);
268 }
269 Ok(Number::I64(v)) => {
270 self.rdr.uneat_char(ch);
271 return visitor.visit_i64(v);
272 }
273 Err(_) => {} }
275 }
276 }
277 }
278 if is_eol {
279 let pos = self.rdr.pos();
280 return visitor.visit_str(
282 str::from_utf8(&self.str_buf)
283 .map_err(|_| {
284 Error::Syntax(ErrorCode::EOFWhileParsingString, pos.0, pos.1)
285 })?
286 .trim(),
287 );
288 }
289 }
290 self.str_buf.push(ch);
291
292 if self.str_buf == b"'''" {
293 return self.parse_ml_string(visitor);
294 }
295 }
296 }
297
298 fn decode_hex_escape(&mut self) -> Result<u16> {
299 let mut i = 0;
300 let mut n = 0u16;
301 while i < 4 && !self.rdr.eof()? {
302 n = match self.rdr.next_char_or_null()? {
303 c @ b'0'..=b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)),
304 b'a' | b'A' => n * 16_u16 + 10_u16,
305 b'b' | b'B' => n * 16_u16 + 11_u16,
306 b'c' | b'C' => n * 16_u16 + 12_u16,
307 b'd' | b'D' => n * 16_u16 + 13_u16,
308 b'e' | b'E' => n * 16_u16 + 14_u16,
309 b'f' | b'F' => n * 16_u16 + 15_u16,
310 _ => {
311 return Err(self.rdr.error(ErrorCode::InvalidEscape));
312 }
313 };
314
315 i += 1;
316 }
317
318 if i != 4 {
320 return Err(self.rdr.error(ErrorCode::InvalidEscape));
321 }
322
323 Ok(n)
324 }
325
326 fn ml_skip_white(&mut self) -> Result<bool> {
327 match self.rdr.peek_or_null()? {
328 b' ' | b'\t' | b'\r' => {
329 self.rdr.eat_char();
330 Ok(true)
331 }
332 _ => Ok(false),
333 }
334 }
335
336 fn ml_skip_indent(&mut self, indent: usize) -> Result<()> {
337 let mut skip = indent;
338 while self.ml_skip_white()? && skip > 0 {
339 skip -= 1;
340 }
341 Ok(())
342 }
343
344 fn parse_ml_string<'de, V>(&mut self, visitor: V) -> Result<V::Value>
345 where
346 V: de::Visitor<'de>,
347 {
348 self.str_buf.clear();
349
350 let mut triple = 0;
352
353 let (_, col) = self.rdr.pos();
355
356 let indent = if col >= 4 { col - 4 } else { 0 };
358
359 while self.ml_skip_white()? {}
361 if self.rdr.peek_or_null()? == b'\n' {
362 self.rdr.eat_char();
363 self.ml_skip_indent(indent)?;
364 }
365
366 loop {
368 if self.rdr.eof()? {
369 return Err(self.rdr.error(ErrorCode::EOFWhileParsingString));
370 } let ch = self.rdr.next_char_or_null()?;
372
373 if ch == b'\'' {
374 triple += 1;
375 if triple == 3 {
376 if self.str_buf.last() == Some(&b'\n') {
377 self.str_buf.pop();
378 }
379 let res = str::from_utf8(&self.str_buf).unwrap();
380 return visitor.visit_str(res);
382 } else {
383 continue;
384 }
385 }
386
387 while triple > 0 {
388 self.str_buf.push(b'\'');
389 triple -= 1;
390 }
391
392 if ch != b'\r' {
393 self.str_buf.push(ch);
394 }
395 if ch == b'\n' {
396 self.ml_skip_indent(indent)?;
397 }
398 }
399 }
400
401 fn parse_string(&mut self) -> Result<()> {
402 self.str_buf.clear();
403
404 loop {
405 let Some(ch) = self.rdr.next_char()? else {
406 return Err(self.rdr.error(ErrorCode::EOFWhileParsingString));
407 };
408
409 match ch {
410 b'"' => {
411 return Ok(());
412 }
413 b'\\' => {
414 let Some(ch) = self.rdr.next_char()? else {
415 return Err(self.rdr.error(ErrorCode::EOFWhileParsingString));
416 };
417
418 match ch {
419 b'"' => self.str_buf.push(b'"'),
420 b'\\' => self.str_buf.push(b'\\'),
421 b'/' => self.str_buf.push(b'/'),
422 b'b' => self.str_buf.push(b'\x08'),
423 b'f' => self.str_buf.push(b'\x0c'),
424 b'n' => self.str_buf.push(b'\n'),
425 b'r' => self.str_buf.push(b'\r'),
426 b't' => self.str_buf.push(b'\t'),
427 b'u' => {
428 let c = match self.decode_hex_escape()? {
429 0xDC00..=0xDFFF => {
430 return Err(self
431 .rdr
432 .error(ErrorCode::LoneLeadingSurrogateInHexEscape));
433 }
434
435 n1 @ 0xD800..=0xDBFF => {
438 match (self.rdr.next_char()?, self.rdr.next_char()?) {
439 (Some(b'\\'), Some(b'u')) => (),
440 _ => {
441 return Err(self
442 .rdr
443 .error(ErrorCode::UnexpectedEndOfHexEscape));
444 }
445 }
446
447 let n2 = self.decode_hex_escape()?;
448
449 if !(0xDC00..=0xDFFF).contains(&n2) {
450 return Err(self
451 .rdr
452 .error(ErrorCode::LoneLeadingSurrogateInHexEscape));
453 }
454
455 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32)
456 + 0x1_0000;
457
458 match char::from_u32(n) {
459 Some(c) => c,
460 None => {
461 return Err(self
462 .rdr
463 .error(ErrorCode::InvalidUnicodeCodePoint));
464 }
465 }
466 }
467
468 n => match char::from_u32(n as u32) {
469 Some(c) => c,
470 None => {
471 return Err(self
472 .rdr
473 .error(ErrorCode::InvalidUnicodeCodePoint));
474 }
475 },
476 };
477
478 self.str_buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes());
479 }
480 _ => {
481 return Err(self.rdr.error(ErrorCode::InvalidEscape));
482 }
483 }
484 }
485 ch => {
486 self.str_buf.push(ch);
487 }
488 }
489 }
490 }
491
492 fn parse_object_colon(&mut self) -> Result<()> {
493 self.rdr.parse_whitespace()?;
494
495 match self.rdr.next_char()? {
496 Some(b':') => Ok(()),
497 Some(_) => Err(self.rdr.error(ErrorCode::ExpectedColon)),
498 None => Err(self.rdr.error(ErrorCode::EOFWhileParsingObject)),
499 }
500 }
501}
502
503impl<'de, 'a, Iter> de::Deserializer<'de> for &'a mut Deserializer<Iter>
504where
505 Iter: Iterator<Item = u8>,
506{
507 type Error = Error;
508
509 #[inline]
510 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
511 where
512 V: de::Visitor<'de>,
513 {
514 if let State::Root = self.state {}
515 self.parse_value(visitor)
516 }
517
518 #[inline]
520 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
521 where
522 V: de::Visitor<'de>,
523 {
524 self.rdr.parse_whitespace()?;
525
526 match self.rdr.peek_or_null()? {
527 b'n' => {
528 self.rdr.eat_char();
529 self.parse_ident(b"ull")?;
530 visitor.visit_none()
531 }
532 _ => visitor.visit_some(self),
533 }
534 }
535
536 #[inline]
538 fn deserialize_newtype_struct<V>(self, _name: &str, visitor: V) -> Result<V::Value>
539 where
540 V: de::Visitor<'de>,
541 {
542 visitor.visit_newtype_struct(self)
543 }
544
545 serde::forward_to_deserialize_any! {
546 bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
547 bytes byte_buf unit unit_struct seq tuple map
548 tuple_struct struct enum identifier ignored_any
549 }
550}
551
552struct SeqVisitor<'a, Iter: 'a + Iterator<Item = u8>> {
553 de: &'a mut Deserializer<Iter>,
554}
555
556impl<'a, Iter: Iterator<Item = u8>> SeqVisitor<'a, Iter> {
557 fn new(de: &'a mut Deserializer<Iter>) -> Self {
558 Self { de }
559 }
560}
561
562impl<'de, 'a, Iter> de::SeqAccess<'de> for SeqVisitor<'a, Iter>
563where
564 Iter: Iterator<Item = u8>,
565{
566 type Error = Error;
567
568 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
569 where
570 T: de::DeserializeSeed<'de>,
571 {
572 self.de.rdr.parse_whitespace()?;
573
574 match self.de.rdr.peek()? {
575 Some(b']') => {
576 return Ok(None);
577 }
578 Some(_) => {}
579 None => {
580 return Err(self.de.rdr.error(ErrorCode::EOFWhileParsingList));
581 }
582 }
583
584 let value = seed.deserialize(&mut *self.de)?;
585
586 self.de.rdr.parse_whitespace()?;
588 if self.de.rdr.peek()? == Some(b',') {
589 self.de.rdr.eat_char();
590 self.de.rdr.parse_whitespace()?;
591 }
592
593 Ok(Some(value))
594 }
595}
596
597struct MapVisitor<'a, Iter: 'a + Iterator<Item = u8>> {
598 de: &'a mut Deserializer<Iter>,
599 first: bool,
600 root: bool,
601}
602
603impl<'a, Iter: Iterator<Item = u8>> MapVisitor<'a, Iter> {
604 fn new(de: &'a mut Deserializer<Iter>, root: bool) -> Self {
605 Self {
606 de,
607 first: true,
608 root,
609 }
610 }
611}
612
613impl<'de, 'a, Iter> de::MapAccess<'de> for MapVisitor<'a, Iter>
614where
615 Iter: Iterator<Item = u8>,
616{
617 type Error = Error;
618
619 fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
620 where
621 K: de::DeserializeSeed<'de>,
622 {
623 self.de.rdr.parse_whitespace()?;
624
625 if self.first {
626 self.first = false;
627 } else if self.de.rdr.peek()? == Some(b',') {
628 self.de.rdr.eat_char();
630 self.de.rdr.parse_whitespace()?;
631 }
632
633 match self.de.rdr.peek()? {
634 Some(b'}') => return Ok(None), Some(_) => {}
636 None => {
637 if self.root {
638 return Ok(None);
639 } else {
640 return Err(self.de.rdr.error(ErrorCode::EOFWhileParsingObject));
641 }
642 }
643 }
644
645 match self.de.rdr.peek()? {
646 Some(ch) => {
647 self.de.state = if ch == b'"' {
648 State::Normal
649 } else {
650 State::Keyname
651 };
652 Ok(Some(seed.deserialize(&mut *self.de)?))
653 }
654 None => Err(self.de.rdr.error(ErrorCode::EOFWhileParsingValue)),
655 }
656 }
657
658 fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
659 where
660 V: de::DeserializeSeed<'de>,
661 {
662 self.de.parse_object_colon()?;
663
664 seed.deserialize(&mut *self.de)
665 }
666}
667
668impl<'de, 'a, Iter> de::VariantAccess<'de> for &'a mut Deserializer<Iter>
669where
670 Iter: Iterator<Item = u8>,
671{
672 type Error = Error;
673
674 fn unit_variant(self) -> Result<()> {
675 de::Deserialize::deserialize(self)
676 }
677
678 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
679 where
680 T: de::DeserializeSeed<'de>,
681 {
682 seed.deserialize(self)
683 }
684
685 fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
686 where
687 V: de::Visitor<'de>,
688 {
689 de::Deserializer::deserialize_any(self, visitor)
690 }
691
692 fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
693 where
694 V: de::Visitor<'de>,
695 {
696 de::Deserializer::deserialize_any(self, visitor)
697 }
698}
699
700pub struct StreamDeserializer<T, Iter>
704where
705 Iter: Iterator<Item = u8>,
706 T: de::DeserializeOwned,
707{
708 deser: Deserializer<Iter>,
709 _marker: PhantomData<T>,
710}
711
712impl<T, Iter> StreamDeserializer<T, Iter>
713where
714 Iter: Iterator<Item = u8>,
715 T: de::DeserializeOwned,
716{
717 pub fn new(iter: Iter) -> StreamDeserializer<T, Iter> {
720 StreamDeserializer {
721 deser: Deserializer::new(iter),
722 _marker: PhantomData,
723 }
724 }
725}
726
727impl<T, Iter> Iterator for StreamDeserializer<T, Iter>
728where
729 Iter: Iterator<Item = u8>,
730 T: de::DeserializeOwned,
731{
732 type Item = Result<T>;
733
734 fn next(&mut self) -> Option<Result<T>> {
735 if let Err(e) = self.deser.rdr.parse_whitespace() {
739 return Some(Err(e));
740 };
741
742 match self.deser.rdr.eof() {
743 Ok(true) => None,
744 Ok(false) => match de::Deserialize::deserialize(&mut self.deser) {
745 Ok(v) => Some(Ok(v)),
746 Err(e) => Some(Err(e)),
747 },
748 Err(e) => Some(Err(e)),
749 }
750 }
751}
752
753pub fn from_iter<I, T>(iter: I) -> Result<T>
758where
759 I: Iterator<Item = io::Result<u8>>,
760 T: de::DeserializeOwned,
761{
762 let fold: io::Result<Vec<_>> = iter.collect();
763 let bytes = fold.map_err(Error::Io)?;
764
765 let mut de = Deserializer::new_for_root(bytes.iter().copied());
771 de::Deserialize::deserialize(&mut de)
772 .and_then(|x| de.end().map(|()| x))
773 .or_else(|_| {
774 let mut de2 = Deserializer::new(bytes.iter().copied());
775 de::Deserialize::deserialize(&mut de2).and_then(|x| de2.end().map(|()| x))
776 })
777
778 }
789
790pub fn from_reader<R, T>(rdr: R) -> Result<T>
792where
793 R: io::Read,
794 T: de::DeserializeOwned,
795{
796 from_iter(rdr.bytes())
797}
798
799pub fn from_slice<T>(v: &[u8]) -> Result<T>
801where
802 T: de::DeserializeOwned,
803{
804 from_iter(v.iter().map(|byte| Ok(*byte)))
805}
806
807pub fn from_str<T>(s: &str) -> Result<T>
809where
810 T: de::DeserializeOwned,
811{
812 if s.chars().last().map_or(false, char::is_whitespace) {
813 from_slice(s.as_bytes())
814 } else {
815 let s = format!("{s}\n");
816 from_slice(s.as_bytes())
817 }
818}