1use byteorder::{BigEndian, ByteOrder, LittleEndian};
14use iter_read::{IterRead, IterReadItem};
15use num_bigint::{BigInt, Sign};
16use num_traits::ToPrimitive;
17use serde::de::Visitor;
18use serde::{de, forward_to_deserialize_any};
19use std::char;
20use std::collections::BTreeMap;
21use std::io;
22use std::io::{BufRead, BufReader, Read};
23use std::iter::FusedIterator;
24use std::mem;
25use std::str;
26use std::str::FromStr;
27use std::vec;
28
29use super::consts::*;
30use super::error::{Error, ErrorCode, Result};
31use super::value;
32
33type MemoId = u32;
34
35#[derive(Clone, Debug, PartialEq)]
36enum Global {
37 Set, Frozenset, Bytearray, List, Int, Encode, Other, }
45
46#[derive(Clone, Debug, PartialEq)]
56enum Value {
57 MemoRef(MemoId),
58 Global(Global),
59 None,
60 Bool(bool),
61 I64(i64),
62 Int(BigInt),
63 F64(f64),
64 Bytes(Vec<u8>),
65 String(String),
66 List(Vec<Value>),
67 Tuple(Vec<Value>),
68 Set(Vec<Value>),
69 FrozenSet(Vec<Value>),
70 Dict(Vec<(Value, Value)>),
71 BinPersId(Box<Value>),
72 PersId(Box<Value>),
73}
74
75#[derive(Clone, Debug, Default)]
77pub struct DeOptions {
78 decode_strings: bool,
79 replace_unresolved_globals: bool,
80}
81
82impl DeOptions {
83 pub fn new() -> Self {
88 Default::default()
89 }
90
91 pub fn decode_strings(mut self) -> Self {
93 self.decode_strings = true;
94 self
95 }
96
97 pub fn replace_unresolved_globals(mut self) -> Self {
99 self.replace_unresolved_globals = true;
100 self
101 }
102}
103
104pub struct Deserializer<R: Read> {
106 rdr: BufReader<R>,
107 options: DeOptions,
108 pos: usize,
109 value: Option<Value>, memo: BTreeMap<MemoId, (Value, i32)>, stack: Vec<Value>, stacks: Vec<Vec<Value>>, }
114
115impl<R: Read> Deserializer<R> {
116 pub fn new(rdr: R, options: DeOptions) -> Deserializer<R> {
118 Deserializer {
119 rdr: BufReader::new(rdr),
120 pos: 0,
121 value: None,
122 memo: BTreeMap::new(),
123 stack: Vec::with_capacity(128),
124 stacks: Vec::with_capacity(16),
125 options,
126 }
127 }
128
129 pub fn reset_memo(&mut self) {
179 self.memo.clear();
180 }
181
182 pub fn deserialize_value(&mut self) -> Result<value::Value> {
186 let internal_value = self.parse_value()?;
187 self.convert_value(internal_value)
188 }
189
190 fn get_next_value(&mut self) -> Result<Value> {
193 match self.value.take() {
194 Some(v) => Ok(v),
195 None => self.parse_value(),
196 }
197 }
198
199 fn parse_value(&mut self) -> Result<Value> {
202 loop {
203 let byte = self.read_byte()?;
204 println!("byte: {}", byte);
205 match byte {
206 PROTO => {
208 self.read_byte()?;
211 }
212 FRAME => {
213 self.read_fixed_8_bytes()?;
215 }
216 STOP => return self.pop(),
217 MARK => {
218 let stack = mem::replace(&mut self.stack, Vec::with_capacity(128));
219 self.stacks.push(stack);
220 }
221 POP => {
222 if self.stack.is_empty() {
223 self.pop_mark()?;
224 } else {
225 self.pop()?;
226 }
227 }
228 POP_MARK => {
229 self.pop_mark()?;
230 }
231 DUP => {
232 let top = self.top()?.clone();
233 self.stack.push(top);
234 }
235
236 PUT => {
238 let bytes = self.read_line()?;
239 let memo_id = self.parse_ascii(bytes)?;
240 self.memoize(memo_id)?;
241 }
242 BINPUT => {
243 let memo_id = self.read_byte()?;
244 self.memoize(memo_id.into())?;
245 }
246 LONG_BINPUT => {
247 let bytes = self.read_fixed_4_bytes()?;
248 let memo_id = LittleEndian::read_u32(&bytes);
249 self.memoize(memo_id)?;
250 }
251 MEMOIZE => {
252 let memo_id = self.memo.len();
253 self.memoize(memo_id as MemoId)?;
254 }
255
256 GET => {
258 let bytes = self.read_line()?;
259 let memo_id = self.parse_ascii(bytes)?;
260 self.push_memo_ref(memo_id)?;
261 }
262 BINGET => {
263 let memo_id = self.read_byte()?;
264 self.push_memo_ref(memo_id.into())?;
265 }
266 LONG_BINGET => {
267 let bytes = self.read_fixed_4_bytes()?;
268 let memo_id = LittleEndian::read_u32(&bytes);
269 self.push_memo_ref(memo_id)?;
270 }
271
272 NONE => self.stack.push(Value::None),
274 NEWFALSE => self.stack.push(Value::Bool(false)),
275 NEWTRUE => self.stack.push(Value::Bool(true)),
276
277 INT => {
279 let line = self.read_line()?;
280 let val = self.decode_text_int(line)?;
281 self.stack.push(val);
282 }
283 LONG => {
284 let line = self.read_line()?;
285 let long = self.decode_text_long(line)?;
286 self.stack.push(long);
287 }
288 FLOAT => {
289 let line = self.read_line()?;
290 let f = self.parse_ascii(line)?;
291 self.stack.push(Value::F64(f));
292 }
293
294 STRING => {
296 let line = self.read_line()?;
297 let string = self.decode_escaped_string(&line)?;
298 self.stack.push(string);
299 }
300 UNICODE => {
301 let line = self.read_line()?;
302 let string = self.decode_escaped_unicode(&line)?;
303 self.stack.push(string);
304 }
305
306 BINFLOAT => {
308 let bytes = self.read_fixed_8_bytes()?;
309 self.stack.push(Value::F64(BigEndian::read_f64(&bytes)));
310 }
311 BININT => {
312 let bytes = self.read_fixed_4_bytes()?;
313 self.stack
314 .push(Value::I64(LittleEndian::read_i32(&bytes).into()));
315 }
316 BININT1 => {
317 let byte = self.read_byte()?;
318 self.stack.push(Value::I64(byte.into()));
319 }
320 BININT2 => {
321 let bytes = self.read_fixed_2_bytes()?;
322 self.stack
323 .push(Value::I64(LittleEndian::read_u16(&bytes).into()));
324 }
325 LONG1 => {
326 let bytes = self.read_u8_prefixed_bytes()?;
327 let long = self.decode_binary_long(bytes);
328 self.stack.push(long);
329 }
330 LONG4 => {
331 let bytes = self.read_i32_prefixed_bytes()?;
332 let long = self.decode_binary_long(bytes);
333 self.stack.push(long);
334 }
335
336 SHORT_BINBYTES => {
338 let string = self.read_u8_prefixed_bytes()?;
339 self.stack.push(Value::Bytes(string));
340 }
341 BINBYTES => {
342 let string = self.read_u32_prefixed_bytes()?;
343 self.stack.push(Value::Bytes(string));
344 }
345 BINBYTES8 => {
346 let string = self.read_u64_prefixed_bytes()?;
347 self.stack.push(Value::Bytes(string));
348 }
349 SHORT_BINSTRING => {
350 let string = self.read_u8_prefixed_bytes()?;
351 let decoded = self.decode_string(string)?;
352 self.stack.push(decoded);
353 }
354 BINSTRING => {
355 let string = self.read_i32_prefixed_bytes()?;
356 let decoded = self.decode_string(string)?;
357 self.stack.push(decoded);
358 }
359 SHORT_BINUNICODE => {
360 let string = self.read_u8_prefixed_bytes()?;
361 let decoded = self.decode_unicode(string)?;
362 self.stack.push(decoded);
363 }
364 BINUNICODE => {
365 let string = self.read_u32_prefixed_bytes()?;
366 let decoded = self.decode_unicode(string)?;
367 self.stack.push(decoded);
368 }
369 BINUNICODE8 => {
370 let string = self.read_u64_prefixed_bytes()?;
371 let decoded = self.decode_unicode(string)?;
372 self.stack.push(decoded);
373 }
374 BYTEARRAY8 => {
375 let string = self.read_u64_prefixed_bytes()?;
376 self.stack.push(Value::Bytes(string));
377 }
378
379 EMPTY_TUPLE => self.stack.push(Value::Tuple(Vec::new())),
381 TUPLE1 => {
382 let item = self.pop()?;
383 self.stack.push(Value::Tuple(vec![item]));
384 }
385 TUPLE2 => {
386 let item2 = self.pop()?;
387 let item1 = self.pop()?;
388 self.stack.push(Value::Tuple(vec![item1, item2]));
389 }
390 TUPLE3 => {
391 let item3 = self.pop()?;
392 let item2 = self.pop()?;
393 let item1 = self.pop()?;
394 self.stack.push(Value::Tuple(vec![item1, item2, item3]));
395 }
396 TUPLE => {
397 let items = self.pop_mark()?;
398 self.stack.push(Value::Tuple(items));
399 }
400
401 EMPTY_LIST => self.stack.push(Value::List(Vec::new())),
403 LIST => {
404 let items = self.pop_mark()?;
405 self.stack.push(Value::List(items));
406 }
407 APPEND => {
408 let value = self.pop()?;
409 self.modify_list(|list| list.push(value))?;
410 }
411 APPENDS => {
412 let items = self.pop_mark()?;
413 self.modify_list(|list| list.extend(items))?;
414 }
415
416 EMPTY_DICT => self.stack.push(Value::Dict(Vec::new())),
418 DICT => {
419 let items = self.pop_mark()?;
420 let mut dict = Vec::with_capacity(items.len() / 2);
421 Self::extend_dict(&mut dict, items);
422 self.stack.push(Value::Dict(dict));
423 }
424 SETITEM => {
425 let value = self.pop()?;
426 let key = self.pop()?;
427 self.modify_dict(|dict| dict.push((key, value)))?;
428 }
429 SETITEMS => {
430 let items = self.pop_mark()?;
431 self.modify_dict(|dict| Self::extend_dict(dict, items))?;
432 }
433
434 EMPTY_SET => self.stack.push(Value::Set(Vec::new())),
436 FROZENSET => {
437 let items = self.pop_mark()?;
438 self.stack.push(Value::FrozenSet(items));
439 }
440 ADDITEMS => {
441 let items = self.pop_mark()?;
442 self.modify_set(|set| set.extend(items))?;
443 }
444
445 GLOBAL => {
448 let modname = self.read_line()?;
449 let globname = self.read_line()?;
450 let value = self.decode_global(modname, globname)?;
451 self.stack.push(value);
452 }
453 STACK_GLOBAL => {
454 let globname = match self.pop_resolve()? {
455 Value::String(string) => string.into_bytes(),
456 other => return Self::stack_error("string", &other, self.pos),
457 };
458 let modname = match self.pop_resolve()? {
459 Value::String(string) => string.into_bytes(),
460 other => return Self::stack_error("string", &other, self.pos),
461 };
462 let value = self.decode_global(modname, globname)?;
463 self.stack.push(value);
464 }
465 REDUCE => {
466 let argtuple = match self.pop_resolve()? {
467 Value::Tuple(args) => args,
468 other => return Self::stack_error("tuple", &other, self.pos),
469 };
470 let global = self.pop_resolve()?;
471 self.reduce_global(global, argtuple)?;
472 }
473
474 INST => {
476 for _ in 0..2 {
478 self.read_line()?;
479 }
480 self.pop_mark()?;
482 self.stack.push(Value::Dict(Vec::new()));
484 }
485 OBJ => {
486 self.pop_mark()?;
488 self.pop()?;
490 self.stack.push(Value::Dict(Vec::new()));
491 }
492 NEWOBJ => {
493 for _ in 0..2 {
495 self.pop()?;
496 }
497 self.stack.push(Value::Dict(Vec::new()));
498 }
499 NEWOBJ_EX => {
500 for _ in 0..3 {
502 self.pop()?;
503 }
504 self.stack.push(Value::Dict(Vec::new()));
505 }
506 BUILD => {
507 let state = self.pop()?;
511 self.pop()?; self.stack.push(state);
513 }
514
515 PERSID => {
516 let line = self.read_line()?;
517 println!("PERSID: {:?}", line);
518 let bytes = Value::Bytes(line);
519 self.stack.push(Value::BinPersId(Box::new(bytes)));
520 }
521
522 BINPERSID => {
523 let binpers_id = self.pop()?;
524 self.stack.push(Value::BinPersId(Box::new(binpers_id)));
525 }
526
527 code => return self.error(ErrorCode::Unsupported(code as char)),
529 }
530 }
531 }
532
533 fn pop(&mut self) -> Result<Value> {
535 match self.stack.pop() {
536 Some(v) => Ok(v),
537 None => self.error(ErrorCode::StackUnderflow),
538 }
539 }
540
541 fn pop_resolve(&mut self) -> Result<Value> {
543 let top = self.stack.pop();
544 match self.resolve(top) {
545 Some(v) => Ok(v),
546 None => self.error(ErrorCode::StackUnderflow),
547 }
548 }
549
550 fn pop_mark(&mut self) -> Result<Vec<Value>> {
552 match self.stacks.pop() {
553 Some(new) => Ok(mem::replace(&mut self.stack, new)),
554 None => self.error(ErrorCode::StackUnderflow),
555 }
556 }
557
558 fn top(&mut self) -> Result<&mut Value> {
560 match self.stack.last_mut() {
561 Some(&mut Value::MemoRef(n)) => self
565 .memo
566 .get_mut(&n)
567 .map(|&mut (ref mut v, _)| v)
568 .ok_or_else(|| Error::Syntax(ErrorCode::MissingMemo(n))),
569 Some(other_value) => Ok(other_value),
570 None => Err(Error::Eval(ErrorCode::StackUnderflow, self.pos)),
571 }
572 }
573
574 fn push_memo_ref(&mut self, memo_id: MemoId) -> Result<()> {
576 self.stack.push(Value::MemoRef(memo_id));
577 match self.memo.get_mut(&memo_id) {
578 Some(&mut (_, ref mut count)) => {
579 *count += 1;
580 Ok(())
581 }
582 None => Err(Error::Eval(ErrorCode::MissingMemo(memo_id), self.pos)),
583 }
584 }
585
586 fn memoize(&mut self, memo_id: MemoId) -> Result<()> {
589 let mut item = self.pop()?;
590 if let Value::MemoRef(id) = item {
591 item = match self.memo.get(&id) {
593 Some((v, _)) => v.clone(),
594 None => return Err(Error::Eval(ErrorCode::MissingMemo(id), self.pos)),
595 };
596 }
597 self.memo.insert(memo_id, (item, 1));
598 self.stack.push(Value::MemoRef(memo_id));
599 Ok(())
600 }
601
602 fn resolve(&mut self, maybe_memo: Option<Value>) -> Option<Value> {
604 match maybe_memo {
605 Some(Value::MemoRef(id)) => {
606 self.memo.get_mut(&id).map(|&mut (ref val, ref mut count)| {
607 *count -= 1;
611 val.clone()
612 })
613 }
614 other => other,
615 }
616 }
617
618 fn resolve_recursive<T, U, F>(&mut self, id: MemoId, u: U, f: F) -> Result<T>
620 where
621 F: FnOnce(&mut Self, U, Value) -> Result<T>,
622 {
623 let (value, mut count) = match self.memo.remove(&id) {
627 Some(entry) => entry,
628 None => return Err(Error::Syntax(ErrorCode::Recursive)),
629 };
630 count -= 1;
631 if count <= 0 {
632 f(self, u, value)
633 } else {
635 let result = f(self, u, value.clone());
636 self.memo.insert(id, (value, count));
637 result
638 }
639 }
640
641 pub fn end(&mut self) -> Result<()> {
643 let mut buf = [0];
644 match self.rdr.read(&mut buf) {
645 Err(err) => Err(Error::Io(err)),
646 Ok(1) => self.error(ErrorCode::TrailingBytes),
647 _ => Ok(()),
648 }
649 }
650
651 fn read_line(&mut self) -> Result<Vec<u8>> {
652 let mut buf = Vec::with_capacity(16);
653 match self.rdr.read_until(b'\n', &mut buf) {
654 Ok(_) => {
655 self.pos += buf.len();
656 buf.pop(); if buf.last() == Some(&b'\r') {
658 buf.pop();
659 }
660 Ok(buf)
661 }
662 Err(err) => Err(Error::Io(err)),
663 }
664 }
665
666 #[inline]
667 fn read_byte(&mut self) -> Result<u8> {
668 let mut buf = [0];
669 match self.rdr.read(&mut buf) {
670 Ok(1) => {
671 self.pos += 1;
672 Ok(buf[0])
673 }
674 Ok(_) => self.error(ErrorCode::EOFWhileParsing),
675 Err(err) => Err(Error::Io(err)),
676 }
677 }
678
679 #[inline]
680 fn read_bytes(&mut self, n: usize) -> Result<Vec<u8>> {
681 let mut buf = Vec::new();
682 match self.rdr.by_ref().take(n as u64).read_to_end(&mut buf) {
683 Ok(m) if n == m => {
684 self.pos += n;
685 Ok(buf)
686 }
687 Ok(_) => self.error(ErrorCode::EOFWhileParsing),
688 Err(err) => Err(Error::Io(err)),
689 }
690 }
691
692 #[inline]
693 fn read_fixed_2_bytes(&mut self) -> Result<[u8; 2]> {
694 let mut buf = [0; 2];
695 match self.rdr.by_ref().take(2).read_exact(&mut buf) {
696 Ok(()) => {
697 self.pos += 2;
698 Ok(buf)
699 }
700 Err(err) => {
701 if err.kind() == std::io::ErrorKind::UnexpectedEof {
702 self.error(ErrorCode::EOFWhileParsing)
703 } else {
704 Err(Error::Io(err))
705 }
706 }
707 }
708 }
709
710 #[inline]
711 fn read_fixed_4_bytes(&mut self) -> Result<[u8; 4]> {
712 let mut buf = [0; 4];
713 match self.rdr.by_ref().take(4).read_exact(&mut buf) {
714 Ok(()) => {
715 self.pos += 4;
716 Ok(buf)
717 }
718 Err(err) => {
719 if err.kind() == std::io::ErrorKind::UnexpectedEof {
720 self.error(ErrorCode::EOFWhileParsing)
721 } else {
722 Err(Error::Io(err))
723 }
724 }
725 }
726 }
727
728 #[inline]
729 fn read_fixed_8_bytes(&mut self) -> Result<[u8; 8]> {
730 let mut buf = [0; 8];
731 match self.rdr.by_ref().take(8).read_exact(&mut buf) {
732 Ok(()) => {
733 self.pos += 8;
734 Ok(buf)
735 }
736 Err(err) => {
737 if err.kind() == std::io::ErrorKind::UnexpectedEof {
738 self.error(ErrorCode::EOFWhileParsing)
739 } else {
740 Err(Error::Io(err))
741 }
742 }
743 }
744 }
745
746 fn read_i32_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
747 let lenbytes = self.read_fixed_4_bytes()?;
748 match LittleEndian::read_i32(&lenbytes) {
749 0 => Ok(vec![]),
750 l if l < 0 => self.error(ErrorCode::NegativeLength),
751 l => self.read_bytes(l as usize),
752 }
753 }
754
755 fn read_u64_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
756 let lenbytes = self.read_fixed_8_bytes()?;
757 self.read_bytes(LittleEndian::read_u64(&lenbytes) as usize)
758 }
759
760 fn read_u32_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
761 let lenbytes = self.read_fixed_4_bytes()?;
762 self.read_bytes(LittleEndian::read_u32(&lenbytes) as usize)
763 }
764
765 fn read_u8_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
766 let lenbyte = self.read_byte()?;
767 self.read_bytes(lenbyte as usize)
768 }
769
770 fn parse_ascii<T: FromStr>(&self, bytes: Vec<u8>) -> Result<T> {
772 match str::from_utf8(&bytes).unwrap_or("").parse() {
773 Ok(v) => Ok(v),
774 Err(_) => self.error(ErrorCode::InvalidLiteral(bytes)),
775 }
776 }
777
778 fn decode_text_int(&self, line: Vec<u8>) -> Result<Value> {
780 Ok(if line == b"00" {
782 Value::Bool(false)
783 } else if line == b"01" {
784 Value::Bool(true)
785 } else {
786 let i = self.parse_ascii(line)?;
787 Value::I64(i)
788 })
789 }
790
791 fn decode_text_long(&self, mut line: Vec<u8>) -> Result<Value> {
793 if line.last() == Some(&b'L') {
795 line.pop();
796 }
797 match BigInt::parse_bytes(&line, 10) {
798 Some(i) => Ok(Value::Int(i)),
799 None => self.error(ErrorCode::InvalidLiteral(line)),
800 }
801 }
802
803 fn decode_escaped_string(&self, slice: &[u8]) -> Result<Value> {
806 let slice = if (slice.len() >= 2)
808 && (slice[0] == slice[slice.len() - 1])
809 && (slice[0] == b'"' || slice[0] == b'\'')
810 {
811 &slice[1..slice.len() - 1]
812 } else {
813 slice
814 };
815 let mut result = Vec::with_capacity(slice.len());
816 let mut iter = slice.iter();
817 while let Some(&b) = iter.next() {
818 match b {
819 b'\\' => match iter.next() {
820 Some(&b'\\') => result.push(b'\\'),
821 Some(&b'a') => result.push(b'\x07'),
822 Some(&b'b') => result.push(b'\x08'),
823 Some(&b't') => result.push(b'\x09'),
824 Some(&b'n') => result.push(b'\x0a'),
825 Some(&b'v') => result.push(b'\x0b'),
826 Some(&b'f') => result.push(b'\x0c'),
827 Some(&b'r') => result.push(b'\x0d'),
828 Some(&b'x') => {
829 match iter
830 .next()
831 .and_then(|&ch1| (ch1 as char).to_digit(16))
832 .and_then(|v1| {
833 iter.next()
834 .and_then(|&ch2| (ch2 as char).to_digit(16))
835 .map(|v2| 16 * (v1 as u8) + (v2 as u8))
836 }) {
837 Some(v) => result.push(v),
838 None => return self.error(ErrorCode::InvalidLiteral(slice.into())),
839 }
840 }
841 _ => return self.error(ErrorCode::InvalidLiteral(slice.into())),
842 },
843 _ => result.push(b),
844 }
845 }
846 self.decode_string(result)
847 }
848
849 fn decode_escaped_unicode(&self, s: &[u8]) -> Result<Value> {
853 let mut result = String::with_capacity(s.len());
854 let mut iter = s.iter();
855 while let Some(&b) = iter.next() {
856 match b {
857 b'\\' => {
858 let nescape = match iter.next() {
859 Some(&b'u') => 4,
860 Some(&b'U') => 8,
861 _ => return self.error(ErrorCode::InvalidLiteral(s.into())),
862 };
863 let mut accum = 0;
864 for _i in 0..nescape {
865 accum *= 16;
866 match iter.next().and_then(|&ch| (ch as char).to_digit(16)) {
867 Some(v) => accum += v,
868 None => return self.error(ErrorCode::InvalidLiteral(s.into())),
869 }
870 }
871 match char::from_u32(accum) {
872 Some(v) => result.push(v),
873 None => return self.error(ErrorCode::InvalidLiteral(s.into())),
874 }
875 }
876 _ => result.push(b as char),
877 }
878 }
879 Ok(Value::String(result))
880 }
881
882 fn decode_string(&self, string: Vec<u8>) -> Result<Value> {
884 if self.options.decode_strings {
885 self.decode_unicode(string)
886 } else {
887 Ok(Value::Bytes(string))
888 }
889 }
890
891 fn decode_unicode(&self, string: Vec<u8>) -> Result<Value> {
893 match String::from_utf8(string) {
894 Ok(v) => Ok(Value::String(v)),
895 Err(_) => self.error(ErrorCode::StringNotUTF8),
896 }
897 }
898
899 fn decode_binary_long(&self, bytes: Vec<u8>) -> Value {
901 let negative = !bytes.is_empty() && (bytes[bytes.len() - 1] & 0x80 != 0);
904 let mut val = BigInt::from_bytes_le(Sign::Plus, &bytes);
905 if negative {
906 val -= BigInt::from(1) << (bytes.len() * 8);
907 }
908 Value::Int(val)
909 }
910
911 fn modify_list<F>(&mut self, f: F) -> Result<()>
913 where
914 F: FnOnce(&mut Vec<Value>),
915 {
916 let pos = self.pos;
917 let top = self.top()?;
918 if let Value::List(ref mut list) = *top {
919 f(list);
920 Ok(())
921 } else {
922 Self::stack_error("list", top, pos)
923 }
924 }
925
926 fn extend_dict(dict: &mut Vec<(Value, Value)>, items: Vec<Value>) {
928 let mut key = None;
929 for value in items {
930 match key.take() {
931 None => key = Some(value),
932 Some(key) => dict.push((key, value)),
933 }
934 }
935 }
936
937 fn modify_dict<F>(&mut self, f: F) -> Result<()>
939 where
940 F: FnOnce(&mut Vec<(Value, Value)>),
941 {
942 let pos = self.pos;
943 let top = self.top()?;
944 if let Value::Dict(ref mut dict) = *top {
945 f(dict);
946 Ok(())
947 } else {
948 Self::stack_error("dict", top, pos)
949 }
950 }
951
952 fn modify_set<F>(&mut self, f: F) -> Result<()>
954 where
955 F: FnOnce(&mut Vec<Value>),
956 {
957 let pos = self.pos;
958 let top = self.top()?;
959 if let Value::Set(ref mut set) = *top {
960 f(set);
961 Ok(())
962 } else {
963 Self::stack_error("set", top, pos)
964 }
965 }
966
967 fn decode_global(&mut self, modname: Vec<u8>, globname: Vec<u8>) -> Result<Value> {
969 let value = match (&*modname, &*globname) {
970 (b"_codecs", b"encode") => Value::Global(Global::Encode),
971 (b"__builtin__", b"set") | (b"builtins", b"set") => Value::Global(Global::Set),
972 (b"__builtin__", b"frozenset") | (b"builtins", b"frozenset") => {
973 Value::Global(Global::Frozenset)
974 }
975 (b"__builtin__", b"list") | (b"builtins", b"list") => Value::Global(Global::List),
976 (b"__builtin__", b"bytearray") | (b"builtins", b"bytearray") => {
977 Value::Global(Global::Bytearray)
978 }
979 (b"__builtin__", b"int") | (b"builtins", b"int") => Value::Global(Global::Int),
980 _ => Value::Global(Global::Other),
981 };
982 Ok(value)
983 }
984
985 fn reduce_global(&mut self, global: Value, mut argtuple: Vec<Value>) -> Result<()> {
987 match global {
988 Value::Global(Global::Set) => match self.resolve(argtuple.pop()) {
989 Some(Value::List(items)) => {
990 self.stack.push(Value::Set(items));
991 Ok(())
992 }
993 _ => self.error(ErrorCode::InvalidValue("set() arg".into())),
994 },
995 Value::Global(Global::Frozenset) => match self.resolve(argtuple.pop()) {
996 Some(Value::List(items)) => {
997 self.stack.push(Value::FrozenSet(items));
998 Ok(())
999 }
1000 _ => self.error(ErrorCode::InvalidValue("frozenset() arg".into())),
1001 },
1002 Value::Global(Global::Bytearray) => {
1003 argtuple.truncate(1);
1005 match self.resolve(argtuple.pop()) {
1006 Some(Value::Bytes(bytes)) => {
1007 self.stack.push(Value::Bytes(bytes));
1008 Ok(())
1009 }
1010 Some(Value::String(string)) => {
1011 self.stack.push(Value::Bytes(
1014 string.chars().map(|ch| ch as u32 as u8).collect(),
1015 ));
1016 Ok(())
1017 }
1018 _ => self.error(ErrorCode::InvalidValue("bytearray() arg".into())),
1019 }
1020 }
1021 Value::Global(Global::List) => match self.resolve(argtuple.pop()) {
1022 Some(Value::List(items)) => {
1023 self.stack.push(Value::List(items));
1024 Ok(())
1025 }
1026 _ => self.error(ErrorCode::InvalidValue("list() arg".into())),
1027 },
1028 Value::Global(Global::Int) => match self.resolve(argtuple.pop()) {
1029 Some(Value::Int(integer)) => {
1030 self.stack.push(Value::Int(integer));
1031 Ok(())
1032 }
1033 _ => self.error(ErrorCode::InvalidValue("int() arg".into())),
1034 },
1035 Value::Global(Global::Encode) => {
1036 match self.resolve(argtuple.pop()) {
1038 Some(Value::String(_)) => {}
1040 _ => return self.error(ErrorCode::InvalidValue("encode() arg".into())),
1041 }
1042 match self.resolve(argtuple.pop()) {
1043 Some(Value::String(s)) => {
1044 let bytes = s.chars().map(|ch| ch as u8).collect();
1048 self.stack.push(Value::Bytes(bytes));
1049 Ok(())
1050 }
1051 _ => self.error(ErrorCode::InvalidValue("encode() arg".into())),
1052 }
1053 }
1054 Value::Global(Global::Other) => {
1055 self.stack.push(Value::Global(Global::Other));
1059 Ok(())
1060 }
1061 other => Self::stack_error("global reference", &other, self.pos),
1062 }
1063 }
1064
1065 fn stack_error<T>(what: &'static str, value: &Value, pos: usize) -> Result<T> {
1066 let it = format!("{:?}", value);
1067 Err(Error::Eval(ErrorCode::InvalidStackTop(what, it), pos))
1068 }
1069
1070 fn error<T>(&self, reason: ErrorCode) -> Result<T> {
1071 Err(Error::Eval(reason, self.pos))
1072 }
1073
1074 fn convert_value(&mut self, value: Value) -> Result<value::Value> {
1075 match value {
1076 Value::None => Ok(value::Value::None),
1077 Value::Bool(v) => Ok(value::Value::Bool(v)),
1078 Value::I64(v) => Ok(value::Value::I64(v)),
1079 Value::Int(v) => {
1080 if let Some(i) = v.to_i64() {
1081 Ok(value::Value::I64(i))
1082 } else {
1083 Ok(value::Value::Int(v))
1084 }
1085 }
1086 Value::F64(v) => Ok(value::Value::F64(v)),
1087 Value::Bytes(v) => Ok(value::Value::Bytes(v)),
1088 Value::String(v) => Ok(value::Value::String(v)),
1089 Value::List(v) => {
1090 let new = v
1091 .into_iter()
1092 .map(|v| self.convert_value(v))
1093 .collect::<Result<_>>();
1094 Ok(value::Value::List(new?))
1095 }
1096 Value::Tuple(v) => {
1097 let new = v
1098 .into_iter()
1099 .map(|v| self.convert_value(v))
1100 .collect::<Result<_>>();
1101 Ok(value::Value::Tuple(new?))
1102 }
1103 Value::Set(v) => {
1104 let new = v
1105 .into_iter()
1106 .map(|v| self.convert_value(v).and_then(|rv| rv.into_hashable()))
1107 .collect::<Result<_>>();
1108 Ok(value::Value::Set(new?))
1109 }
1110 Value::FrozenSet(v) => {
1111 let new = v
1112 .into_iter()
1113 .map(|v| self.convert_value(v).and_then(|rv| rv.into_hashable()))
1114 .collect::<Result<_>>();
1115 Ok(value::Value::FrozenSet(new?))
1116 }
1117 Value::Dict(v) => {
1118 let mut map = BTreeMap::new();
1119 for (key, value) in v {
1120 let real_key = self.convert_value(key).and_then(|rv| rv.into_hashable())?;
1121 let real_value = self.convert_value(value)?;
1122 map.insert(real_key, real_value);
1123 }
1124 Ok(value::Value::Dict(map))
1125 }
1126 Value::MemoRef(memo_id) => {
1127 self.resolve_recursive(memo_id, (), |slf, (), value| slf.convert_value(value))
1128 }
1129 Value::Global(g) => Ok(value::Value::Global(format!("{:?}", g))),
1130 Value::PersId(id) => Ok(value::Value::PersId(Box::new(self.convert_value(*id)?))),
1131 Value::BinPersId(id) => Ok(value::Value::BinPersId(Box::new(self.convert_value(*id)?))),
1132 }
1133 }
1134}
1135
1136impl<'de: 'a, 'a, R: Read> de::Deserializer<'de> for &'a mut Deserializer<R> {
1137 type Error = Error;
1138
1139 fn deserialize_any<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
1140 let value = self.get_next_value()?;
1141 match value {
1142 Value::None => visitor.visit_unit(),
1143 Value::Bool(v) => visitor.visit_bool(v),
1144 Value::I64(v) => visitor.visit_i64(v),
1145 Value::Int(v) => {
1146 if let Some(i) = v.to_i64() {
1147 visitor.visit_i64(i)
1148 } else {
1149 Err(Error::Syntax(ErrorCode::InvalidValue(
1150 "integer too large".into(),
1151 )))
1152 }
1153 }
1154 Value::F64(v) => visitor.visit_f64(v),
1155 Value::Bytes(v) => visitor.visit_byte_buf(v),
1156 Value::String(v) => visitor.visit_string(v),
1157 Value::List(v) => {
1158 let len = v.len();
1159 visitor.visit_seq(SeqAccess {
1160 de: self,
1161 iter: v.into_iter(),
1162 len,
1163 })
1164 }
1165 Value::Tuple(v) => visitor.visit_seq(SeqAccess {
1166 len: v.len(),
1167 iter: v.into_iter(),
1168 de: self,
1169 }),
1170 Value::Set(v) | Value::FrozenSet(v) => visitor.visit_seq(SeqAccess {
1171 de: self,
1172 len: v.len(),
1173 iter: v.into_iter(),
1174 }),
1175 Value::Dict(v) => {
1176 let len = v.len();
1177 visitor.visit_map(MapAccess {
1178 de: self,
1179 iter: v.into_iter(),
1180 value: None,
1181 len,
1182 })
1183 }
1184 Value::MemoRef(memo_id) => {
1185 self.resolve_recursive(memo_id, visitor, |slf, visitor, value| {
1186 slf.value = Some(value);
1187 slf.deserialize_any(visitor)
1188 })
1189 }
1190 Value::Global(_) => {
1191 if self.options.replace_unresolved_globals {
1192 visitor.visit_unit()
1193 } else {
1194 Err(Error::Syntax(ErrorCode::UnresolvedGlobal))
1195 }
1196 }
1197 Value::PersId(_) => todo!("persid"),
1198 Value::BinPersId(_) => todo!("binpersid"),
1199 }
1200 }
1201
1202 #[inline]
1203 fn deserialize_option<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
1204 let value = self.get_next_value()?;
1205 match value {
1206 Value::None => visitor.visit_none(),
1207 _ => {
1208 self.value = Some(value);
1209 visitor.visit_some(self)
1210 }
1211 }
1212 }
1213
1214 #[inline]
1215 fn deserialize_newtype_struct<V: Visitor<'de>>(
1216 self,
1217 _name: &'static str,
1218 visitor: V,
1219 ) -> Result<V::Value> {
1220 visitor.visit_newtype_struct(self)
1221 }
1222
1223 #[inline]
1224 fn deserialize_enum<V: Visitor<'de>>(
1225 self,
1226 _name: &'static str,
1227 _variants: &'static [&'static str],
1228 visitor: V,
1229 ) -> Result<V::Value> {
1230 visitor.visit_enum(VariantAccess { de: self })
1231 }
1232
1233 forward_to_deserialize_any! {
1234 bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
1235 bytes byte_buf map tuple_struct struct identifier
1236 tuple ignored_any unit_struct
1237 }
1238}
1239
1240struct VariantAccess<'a, R: Read + 'a> {
1241 de: &'a mut Deserializer<R>,
1242}
1243
1244impl<'de: 'a, 'a, R: Read + 'a> de::EnumAccess<'de> for VariantAccess<'a, R> {
1245 type Error = Error;
1246 type Variant = Self;
1247
1248 fn variant_seed<V: de::DeserializeSeed<'de>>(self, seed: V) -> Result<(V::Value, Self)> {
1249 let value = self.de.get_next_value()?;
1250 match value {
1251 Value::Tuple(mut v) => {
1252 if v.len() == 2 {
1253 let args = v.pop();
1254 self.de.value = v.pop();
1255 let val = seed.deserialize(&mut *self.de)?;
1256 self.de.value = args;
1257 Ok((val, self))
1258 } else {
1259 self.de.value = v.pop();
1260 let val = seed.deserialize(&mut *self.de)?;
1261 Ok((val, self))
1262 }
1263 }
1264 Value::Dict(mut v) => {
1265 if v.len() != 1 {
1266 Err(Error::Syntax(ErrorCode::Structure(
1267 "enum variants must \
1268 have one dict entry"
1269 .into(),
1270 )))
1271 } else {
1272 let (name, args) = v.pop().unwrap();
1273 self.de.value = Some(name);
1274 let val = seed.deserialize(&mut *self.de)?;
1275 self.de.value = Some(args);
1276 Ok((val, self))
1277 }
1278 }
1279 Value::MemoRef(memo_id) => {
1280 self.de.resolve_recursive(memo_id, (), |slf, (), value| {
1281 slf.value = Some(value);
1282 Ok(())
1283 })?;
1284 self.variant_seed(seed)
1286 }
1287 s @ Value::String(_) => {
1288 self.de.value = Some(s);
1289 let val = seed.deserialize(&mut *self.de)?;
1290 Ok((val, self))
1291 }
1292 _ => Err(Error::Syntax(ErrorCode::Structure(
1293 "enums must be represented as \
1294 dicts or tuples"
1295 .into(),
1296 ))),
1297 }
1298 }
1299}
1300
1301impl<'de: 'a, 'a, R: Read + 'a> de::VariantAccess<'de> for VariantAccess<'a, R> {
1302 type Error = Error;
1303
1304 fn unit_variant(self) -> Result<()> {
1305 Ok(())
1306 }
1307
1308 fn newtype_variant_seed<T: de::DeserializeSeed<'de>>(self, seed: T) -> Result<T::Value> {
1309 seed.deserialize(self.de)
1310 }
1311
1312 fn tuple_variant<V: Visitor<'de>>(self, _len: usize, visitor: V) -> Result<V::Value> {
1313 de::Deserializer::deserialize_any(self.de, visitor)
1314 }
1315
1316 fn struct_variant<V: Visitor<'de>>(
1317 self,
1318 _fields: &'static [&'static str],
1319 visitor: V,
1320 ) -> Result<V::Value> {
1321 de::Deserializer::deserialize_any(self.de, visitor)
1322 }
1323}
1324
1325struct SeqAccess<'a, R: Read + 'a> {
1326 de: &'a mut Deserializer<R>,
1327 iter: vec::IntoIter<Value>,
1328 len: usize,
1329}
1330
1331impl<'de: 'a, 'a, R: Read> de::SeqAccess<'de> for SeqAccess<'a, R> {
1332 type Error = Error;
1333
1334 fn next_element_seed<T: de::DeserializeSeed<'de>>(
1335 &mut self,
1336 seed: T,
1337 ) -> Result<Option<T::Value>> {
1338 match self.iter.next() {
1339 Some(value) => {
1340 self.len -= 1;
1341 self.de.value = Some(value);
1342 Ok(Some(seed.deserialize(&mut *self.de)?))
1343 }
1344 None => Ok(None),
1345 }
1346 }
1347
1348 fn size_hint(&self) -> Option<usize> {
1349 Some(self.len)
1350 }
1351}
1352
1353struct MapAccess<'a, R: Read + 'a> {
1354 de: &'a mut Deserializer<R>,
1355 iter: vec::IntoIter<(Value, Value)>,
1356 value: Option<Value>,
1357 len: usize,
1358}
1359
1360impl<'de: 'a, 'a, R: Read> de::MapAccess<'de> for MapAccess<'a, R> {
1361 type Error = Error;
1362
1363 fn next_key_seed<T: de::DeserializeSeed<'de>>(&mut self, seed: T) -> Result<Option<T::Value>> {
1364 match self.iter.next() {
1365 Some((key, value)) => {
1366 self.len -= 1;
1367 self.value = Some(value);
1368 self.de.value = Some(key);
1369 Ok(Some(seed.deserialize(&mut *self.de)?))
1370 }
1371 None => Ok(None),
1372 }
1373 }
1374
1375 fn next_value_seed<T: de::DeserializeSeed<'de>>(&mut self, seed: T) -> Result<T::Value> {
1376 let value = self.value.take().unwrap();
1377 self.de.value = Some(value);
1378 seed.deserialize(&mut *self.de)
1379 }
1380
1381 fn size_hint(&self) -> Option<usize> {
1382 Some(self.len)
1383 }
1384}
1385
1386pub fn from_reader<'de, R: io::Read, T: de::Deserialize<'de>>(
1388 rdr: R,
1389 options: DeOptions,
1390) -> Result<T> {
1391 let mut de = Deserializer::new(rdr, options);
1392 let value = de::Deserialize::deserialize(&mut de)?;
1393 de.end()?;
1395 Ok(value)
1396}
1397
1398pub fn from_slice<'de, T: de::Deserialize<'de>>(v: &[u8], options: DeOptions) -> Result<T> {
1400 from_reader(io::Cursor::new(v), options)
1401}
1402
1403pub fn from_iter<'de, E, I, T>(it: I, options: DeOptions) -> Result<T>
1405where
1406 E: IterReadItem,
1407 I: FusedIterator<Item = E>,
1408 T: de::Deserialize<'de>,
1409{
1410 from_reader(IterRead::new(it), options)
1411}
1412
1413pub fn value_from_reader<R: io::Read>(rdr: R, options: DeOptions) -> Result<value::Value> {
1415 let mut de = Deserializer::new(rdr, options);
1416 let value = de.deserialize_value()?;
1417 de.end()?;
1418 Ok(value)
1419}
1420
1421pub fn value_from_slice(v: &[u8], options: DeOptions) -> Result<value::Value> {
1423 value_from_reader(io::Cursor::new(v), options)
1424}
1425
1426pub fn value_from_iter<E, I>(it: I, options: DeOptions) -> Result<value::Value>
1428where
1429 E: IterReadItem,
1430 I: FusedIterator<Item = E>,
1431{
1432 value_from_reader(IterRead::new(it), options)
1433}