1use byteorder::{BigEndian, ByteOrder, LittleEndian};
14use iter_read::{IterRead, IterReadItem};
15use num_bigint::{BigInt, Sign};
16use num_traits::ToPrimitive;
17use serde::de::Visitor;
18use serde::{de, forward_to_deserialize_any};
19use std::char;
20use std::collections::BTreeMap;
21use std::io;
22use std::io::{BufRead, BufReader, Read};
23use std::iter::FusedIterator;
24use std::mem;
25use std::str;
26use std::str::FromStr;
27use std::vec;
28
29use super::consts::*;
30use super::error::{Error, ErrorCode, Result};
31use super::value;
32
33type MemoId = u32;
34
35#[derive(Clone, Debug, PartialEq)]
36enum Global {
37 Set, Frozenset, Bytearray, List, Int, Encode, Reconst, Other, }
46
47#[derive(Clone, Debug, PartialEq)]
57enum Value {
58 MemoRef(MemoId),
59 Global(Global),
60 None,
61 Bool(bool),
62 I64(i64),
63 Int(BigInt),
64 F64(f64),
65 Bytes(Vec<u8>),
66 String(String),
67 List(Vec<Value>),
68 Tuple(Vec<Value>),
69 Set(Vec<Value>),
70 FrozenSet(Vec<Value>),
71 Dict(Vec<(Value, Value)>),
72}
73
74#[derive(Clone, Debug, Default)]
76pub struct DeOptions {
77 decode_strings: bool,
78 keep_restore_state: bool,
79 replace_unresolved_globals: bool,
80 replace_recursive_structures: bool,
81}
82
83impl DeOptions {
84 pub fn new() -> Self {
91 Default::default()
92 }
93
94 pub fn decode_strings(mut self) -> Self {
96 self.decode_strings = true;
97 self
98 }
99
100 pub fn keep_restore_state(mut self) -> Self {
103 self.keep_restore_state = true;
104 self
105 }
106
107 pub fn replace_unresolved_globals(mut self) -> Self {
109 self.replace_unresolved_globals = true;
110 self
111 }
112
113 pub fn replace_recursive_structures(mut self) -> Self {
115 self.replace_recursive_structures = true;
116 self
117 }
118}
119
120pub struct Deserializer<R: Read> {
122 rdr: BufReader<R>,
123 options: DeOptions,
124 pos: usize,
125 value: Option<Value>, memo: BTreeMap<MemoId, (Value, i32)>, stack: Vec<Value>, stacks: Vec<Vec<Value>>, }
130
131impl<R: Read> Deserializer<R> {
132 pub fn new(rdr: R, options: DeOptions) -> Deserializer<R> {
134 Deserializer {
135 rdr: BufReader::new(rdr),
136 pos: 0,
137 value: None,
138 memo: BTreeMap::new(),
139 stack: Vec::with_capacity(128),
140 stacks: Vec::with_capacity(16),
141 options,
142 }
143 }
144
145 pub fn reset_memo(&mut self) {
195 self.memo.clear();
196 }
197
198 pub fn deserialize_value(&mut self) -> Result<value::Value> {
202 let internal_value = self.parse_value()?;
203 self.convert_value(internal_value)
204 }
205
206 fn get_next_value(&mut self) -> Result<Value> {
209 match self.value.take() {
210 Some(v) => Ok(v),
211 None => self.parse_value(),
212 }
213 }
214
215 fn parse_value(&mut self) -> Result<Value> {
218 loop {
219 match self.read_byte()? {
220 PROTO => {
222 self.read_byte()?;
225 }
226 FRAME => {
227 self.read_fixed_8_bytes()?;
229 }
230 STOP => return self.pop(),
231 MARK => {
232 let stack = mem::replace(&mut self.stack, Vec::with_capacity(128));
233 self.stacks.push(stack);
234 }
235 POP => {
236 if self.stack.is_empty() {
237 self.pop_mark()?;
238 } else {
239 self.pop()?;
240 }
241 }
242 POP_MARK => {
243 self.pop_mark()?;
244 }
245 DUP => {
246 let top = self.top()?.clone();
247 self.stack.push(top);
248 }
249
250 PUT => {
252 let bytes = self.read_line()?;
253 let memo_id = self.parse_ascii(bytes)?;
254 self.memoize(memo_id)?;
255 }
256 BINPUT => {
257 let memo_id = self.read_byte()?;
258 self.memoize(memo_id.into())?;
259 }
260 LONG_BINPUT => {
261 let bytes = self.read_fixed_4_bytes()?;
262 let memo_id = LittleEndian::read_u32(&bytes);
263 self.memoize(memo_id)?;
264 }
265 MEMOIZE => {
266 let memo_id = self.memo.len();
267 self.memoize(memo_id as MemoId)?;
268 }
269
270 GET => {
272 let bytes = self.read_line()?;
273 let memo_id = self.parse_ascii(bytes)?;
274 self.push_memo_ref(memo_id)?;
275 }
276 BINGET => {
277 let memo_id = self.read_byte()?;
278 self.push_memo_ref(memo_id.into())?;
279 }
280 LONG_BINGET => {
281 let bytes = self.read_fixed_4_bytes()?;
282 let memo_id = LittleEndian::read_u32(&bytes);
283 self.push_memo_ref(memo_id)?;
284 }
285
286 NONE => self.stack.push(Value::None),
288 NEWFALSE => self.stack.push(Value::Bool(false)),
289 NEWTRUE => self.stack.push(Value::Bool(true)),
290
291 INT => {
293 let line = self.read_line()?;
294 let val = self.decode_text_int(line)?;
295 self.stack.push(val);
296 }
297 LONG => {
298 let line = self.read_line()?;
299 let long = self.decode_text_long(line)?;
300 self.stack.push(long);
301 }
302 FLOAT => {
303 let line = self.read_line()?;
304 let f = self.parse_ascii(line)?;
305 self.stack.push(Value::F64(f));
306 }
307
308 STRING => {
310 let line = self.read_line()?;
311 let string = self.decode_escaped_string(&line)?;
312 self.stack.push(string);
313 }
314 UNICODE => {
315 let line = self.read_line()?;
316 let string = self.decode_escaped_unicode(&line)?;
317 self.stack.push(string);
318 }
319
320 BINFLOAT => {
322 let bytes = self.read_fixed_8_bytes()?;
323 self.stack.push(Value::F64(BigEndian::read_f64(&bytes)));
324 }
325 BININT => {
326 let bytes = self.read_fixed_4_bytes()?;
327 self.stack.push(Value::I64(LittleEndian::read_i32(&bytes).into()));
328 }
329 BININT1 => {
330 let byte = self.read_byte()?;
331 self.stack.push(Value::I64(byte.into()));
332 }
333 BININT2 => {
334 let bytes = self.read_fixed_2_bytes()?;
335 self.stack.push(Value::I64(LittleEndian::read_u16(&bytes).into()));
336 }
337 LONG1 => {
338 let bytes = self.read_u8_prefixed_bytes()?;
339 let long = self.decode_binary_long(bytes);
340 self.stack.push(long);
341 }
342 LONG4 => {
343 let bytes = self.read_i32_prefixed_bytes()?;
344 let long = self.decode_binary_long(bytes);
345 self.stack.push(long);
346 }
347
348 SHORT_BINBYTES => {
350 let string = self.read_u8_prefixed_bytes()?;
351 self.stack.push(Value::Bytes(string));
352 }
353 BINBYTES => {
354 let string = self.read_u32_prefixed_bytes()?;
355 self.stack.push(Value::Bytes(string));
356 }
357 BINBYTES8 => {
358 let string = self.read_u64_prefixed_bytes()?;
359 self.stack.push(Value::Bytes(string));
360 }
361 SHORT_BINSTRING => {
362 let string = self.read_u8_prefixed_bytes()?;
363 let decoded = self.decode_string(string)?;
364 self.stack.push(decoded);
365 }
366 BINSTRING => {
367 let string = self.read_i32_prefixed_bytes()?;
368 let decoded = self.decode_string(string)?;
369 self.stack.push(decoded);
370 }
371 SHORT_BINUNICODE => {
372 let string = self.read_u8_prefixed_bytes()?;
373 let decoded = self.decode_unicode(string)?;
374 self.stack.push(decoded);
375 }
376 BINUNICODE => {
377 let string = self.read_u32_prefixed_bytes()?;
378 let decoded = self.decode_unicode(string)?;
379 self.stack.push(decoded);
380 }
381 BINUNICODE8 => {
382 let string = self.read_u64_prefixed_bytes()?;
383 let decoded = self.decode_unicode(string)?;
384 self.stack.push(decoded);
385 }
386 BYTEARRAY8 => {
387 let string = self.read_u64_prefixed_bytes()?;
388 self.stack.push(Value::Bytes(string));
389 }
390
391 EMPTY_TUPLE => self.stack.push(Value::Tuple(Vec::new())),
393 TUPLE1 => {
394 let item = self.pop()?;
395 self.stack.push(Value::Tuple(vec![item]));
396 }
397 TUPLE2 => {
398 let item2 = self.pop()?;
399 let item1 = self.pop()?;
400 self.stack.push(Value::Tuple(vec![item1, item2]));
401 }
402 TUPLE3 => {
403 let item3 = self.pop()?;
404 let item2 = self.pop()?;
405 let item1 = self.pop()?;
406 self.stack.push(Value::Tuple(vec![item1, item2, item3]));
407 }
408 TUPLE => {
409 let items = self.pop_mark()?;
410 self.stack.push(Value::Tuple(items));
411 }
412
413 EMPTY_LIST => self.stack.push(Value::List(Vec::new())),
415 LIST => {
416 let items = self.pop_mark()?;
417 self.stack.push(Value::List(items));
418 }
419 APPEND => {
420 let value = self.pop()?;
421 self.modify_list(|list| list.push(value))?;
422 }
423 APPENDS => {
424 let items = self.pop_mark()?;
425 self.modify_list(|list| list.extend(items))?;
426 }
427
428 EMPTY_DICT => self.stack.push(Value::Dict(Vec::new())),
430 DICT => {
431 let items = self.pop_mark()?;
432 let mut dict = Vec::with_capacity(items.len() / 2);
433 Self::extend_dict(&mut dict, items);
434 self.stack.push(Value::Dict(dict));
435 }
436 SETITEM => {
437 let value = self.pop()?;
438 let key = self.pop()?;
439 self.modify_dict(|dict| dict.push((key, value)))?;
440 }
441 SETITEMS => {
442 let items = self.pop_mark()?;
443 self.modify_dict(|dict| Self::extend_dict(dict, items))?;
444 }
445
446 EMPTY_SET => self.stack.push(Value::Set(Vec::new())),
448 FROZENSET => {
449 let items = self.pop_mark()?;
450 self.stack.push(Value::FrozenSet(items));
451 }
452 ADDITEMS => {
453 let items = self.pop_mark()?;
454 self.modify_set(|set| set.extend(items))?;
455 }
456
457 GLOBAL => {
460 let modname = self.read_line()?;
461 let globname = self.read_line()?;
462 let value = self.decode_global(modname, globname)?;
463 self.stack.push(value);
464 }
465 STACK_GLOBAL => {
466 let globname = match self.pop_resolve()? {
467 Value::String(string) => string.into_bytes(),
468 other => return Self::stack_error("string", &other, self.pos),
469 };
470 let modname = match self.pop_resolve()? {
471 Value::String(string) => string.into_bytes(),
472 other => return Self::stack_error("string", &other, self.pos),
473 };
474 let value = self.decode_global(modname, globname)?;
475 self.stack.push(value);
476 }
477 REDUCE => {
478 let argtuple = match self.pop_resolve()? {
479 Value::Tuple(args) => args,
480 other => return Self::stack_error("tuple", &other, self.pos),
481 };
482 let global = self.pop_resolve()?;
483 self.reduce_global(global, argtuple)?;
484 }
485
486 INST => {
488 for _ in 0..2 {
490 self.read_line()?;
491 }
492 let args = self.pop_mark()?;
494 if self.options.keep_restore_state {
495 self.stack.push(Value::Tuple(args));
496 } else {
497 self.stack.push(Value::Dict(Vec::new()));
498 }
499 }
500 OBJ => {
501 let args = self.pop_mark()?;
503 self.pop()?;
505 if self.options.keep_restore_state {
506 self.stack.push(Value::Tuple(args));
507 } else {
508 self.stack.push(Value::Dict(Vec::new()));
509 }
510 }
511 NEWOBJ => {
512 let args = self.pop()?;
514 self.pop()?;
515 if self.options.keep_restore_state {
516 self.stack.push(args);
517 } else {
518 self.stack.push(Value::Dict(Vec::new()));
519 }
520 }
521 NEWOBJ_EX => {
522 let kwargs = self.pop()?;
524 let args = self.pop()?;
525 self.pop()?;
526 if self.options.keep_restore_state {
527 self.stack.push(Value::Tuple(vec![args, kwargs]));
528 } else {
529 self.stack.push(Value::Dict(Vec::new()));
530 }
531 }
532 BUILD => {
533 let state = self.pop()?;
537 self.pop()?; self.stack.push(state);
539 }
540
541 code => return self.error(ErrorCode::Unsupported(code as char)),
543 }
544 }
545 }
546
547 fn pop(&mut self) -> Result<Value> {
549 match self.stack.pop() {
550 Some(v) => Ok(v),
551 None => self.error(ErrorCode::StackUnderflow),
552 }
553 }
554
555 fn pop_resolve(&mut self) -> Result<Value> {
557 let top = self.stack.pop();
558 match top {
559 Some(v) => self.resolve(v),
560 None => self.error(ErrorCode::StackUnderflow),
561 }
562 }
563
564 fn pop_mark(&mut self) -> Result<Vec<Value>> {
566 match self.stacks.pop() {
567 Some(new) => Ok(mem::replace(&mut self.stack, new)),
568 None => self.error(ErrorCode::StackUnderflow),
569 }
570 }
571
572 fn top(&mut self) -> Result<&mut Value> {
574 match self.stack.last_mut() {
575 Some(&mut Value::MemoRef(n)) => self
579 .memo
580 .get_mut(&n)
581 .map(|&mut (ref mut v, _)| v)
582 .ok_or(Error::Syntax(ErrorCode::MissingMemo(n))),
583 Some(other_value) => Ok(other_value),
584 None => Err(Error::Eval(ErrorCode::StackUnderflow, self.pos)),
585 }
586 }
587
588 fn push_memo_ref(&mut self, memo_id: MemoId) -> Result<()> {
590 self.stack.push(Value::MemoRef(memo_id));
591 match self.memo.get_mut(&memo_id) {
592 Some(&mut (_, ref mut count)) => {
593 *count += 1;
594 Ok(())
595 }
596 None => Err(Error::Eval(ErrorCode::MissingMemo(memo_id), self.pos)),
597 }
598 }
599
600 fn memoize(&mut self, memo_id: MemoId) -> Result<()> {
603 let mut item = self.pop()?;
604 if let Value::MemoRef(id) = item {
605 item = match self.memo.get(&id) {
607 Some((v, _)) => v.clone(),
608 None => return Err(Error::Eval(ErrorCode::MissingMemo(id), self.pos)),
609 };
610 }
611 self.memo.insert(memo_id, (item, 1));
612 self.stack.push(Value::MemoRef(memo_id));
613 Ok(())
614 }
615
616 fn resolve(&mut self, memo: Value) -> Result<Value> {
618 match memo {
619 Value::MemoRef(id) => match self.memo.get_mut(&id) {
620 None => Err(Error::Eval(ErrorCode::MissingMemo(id), self.pos)),
621 Some(&mut (ref val, ref mut count)) => {
622 *count -= 1;
626 Ok(val.clone())
627 }
628 },
629 other => Ok(other),
630 }
631 }
632
633 fn resolve_recursive<T, U, F>(&mut self, id: MemoId, u: U, f: F) -> Result<T>
635 where
636 F: FnOnce(&mut Self, U, Value) -> Result<T>,
637 {
638 let (value, mut count) = match self.memo.remove(&id) {
642 Some(entry) => entry,
643 None => {
644 return {
645 if self.options.replace_recursive_structures {
646 f(self, u, Value::None)
647 } else {
648 Err(Error::Syntax(ErrorCode::Recursive))
649 }
650 }
651 }
652 };
653 count -= 1;
654 if count <= 0 {
655 f(self, u, value)
656 } else {
658 let result = f(self, u, value.clone());
659 self.memo.insert(id, (value, count));
660 result
661 }
662 }
663
664 pub fn end(&mut self) -> Result<()> {
666 let mut buf = [0];
667 match self.rdr.read(&mut buf) {
668 Err(err) => Err(Error::Io(err)),
669 Ok(1) => self.error(ErrorCode::TrailingBytes),
670 _ => Ok(()),
671 }
672 }
673
674 fn read_line(&mut self) -> Result<Vec<u8>> {
675 let mut buf = Vec::with_capacity(16);
676 match self.rdr.read_until(b'\n', &mut buf) {
677 Ok(_) => {
678 self.pos += buf.len();
679 buf.pop(); if buf.last() == Some(&b'\r') {
681 buf.pop();
682 }
683 Ok(buf)
684 }
685 Err(err) => Err(Error::Io(err)),
686 }
687 }
688
689 #[inline]
690 fn read_byte(&mut self) -> Result<u8> {
691 let mut buf = [0];
692 match self.rdr.read(&mut buf) {
693 Ok(1) => {
694 self.pos += 1;
695 Ok(buf[0])
696 }
697 Ok(_) => self.error(ErrorCode::EOFWhileParsing),
698 Err(err) => Err(Error::Io(err)),
699 }
700 }
701
702 #[inline]
703 fn read_bytes(&mut self, n: usize) -> Result<Vec<u8>> {
704 let mut buf = Vec::new();
705 match self.rdr.by_ref().take(n as u64).read_to_end(&mut buf) {
706 Ok(m) if n == m => {
707 self.pos += n;
708 Ok(buf)
709 }
710 Ok(_) => self.error(ErrorCode::EOFWhileParsing),
711 Err(err) => Err(Error::Io(err)),
712 }
713 }
714
715 #[inline]
716 fn read_fixed_2_bytes(&mut self) -> Result<[u8; 2]> {
717 let mut buf = [0; 2];
718 match self.rdr.by_ref().take(2).read_exact(&mut buf) {
719 Ok(()) => {
720 self.pos += 2;
721 Ok(buf)
722 }
723 Err(err) => {
724 if err.kind() == std::io::ErrorKind::UnexpectedEof {
725 self.error(ErrorCode::EOFWhileParsing)
726 } else {
727 Err(Error::Io(err))
728 }
729 }
730 }
731 }
732
733 #[inline]
734 fn read_fixed_4_bytes(&mut self) -> Result<[u8; 4]> {
735 let mut buf = [0; 4];
736 match self.rdr.by_ref().take(4).read_exact(&mut buf) {
737 Ok(()) => {
738 self.pos += 4;
739 Ok(buf)
740 }
741 Err(err) => {
742 if err.kind() == std::io::ErrorKind::UnexpectedEof {
743 self.error(ErrorCode::EOFWhileParsing)
744 } else {
745 Err(Error::Io(err))
746 }
747 }
748 }
749 }
750
751 #[inline]
752 fn read_fixed_8_bytes(&mut self) -> Result<[u8; 8]> {
753 let mut buf = [0; 8];
754 match self.rdr.by_ref().take(8).read_exact(&mut buf) {
755 Ok(()) => {
756 self.pos += 8;
757 Ok(buf)
758 }
759 Err(err) => {
760 if err.kind() == std::io::ErrorKind::UnexpectedEof {
761 self.error(ErrorCode::EOFWhileParsing)
762 } else {
763 Err(Error::Io(err))
764 }
765 }
766 }
767 }
768
769 fn read_i32_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
770 let lenbytes = self.read_fixed_4_bytes()?;
771 match LittleEndian::read_i32(&lenbytes) {
772 0 => Ok(vec![]),
773 l if l < 0 => self.error(ErrorCode::NegativeLength),
774 l => self.read_bytes(l as usize),
775 }
776 }
777
778 fn read_u64_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
779 let lenbytes = self.read_fixed_8_bytes()?;
780 self.read_bytes(LittleEndian::read_u64(&lenbytes) as usize)
781 }
782
783 fn read_u32_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
784 let lenbytes = self.read_fixed_4_bytes()?;
785 self.read_bytes(LittleEndian::read_u32(&lenbytes) as usize)
786 }
787
788 fn read_u8_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
789 let lenbyte = self.read_byte()?;
790 self.read_bytes(lenbyte as usize)
791 }
792
793 fn parse_ascii<T: FromStr>(&self, bytes: Vec<u8>) -> Result<T> {
795 match str::from_utf8(&bytes).unwrap_or("").parse() {
796 Ok(v) => Ok(v),
797 Err(_) => self.error(ErrorCode::InvalidLiteral(bytes)),
798 }
799 }
800
801 fn decode_text_int(&self, line: Vec<u8>) -> Result<Value> {
803 Ok(if line == b"00" {
805 Value::Bool(false)
806 } else if line == b"01" {
807 Value::Bool(true)
808 } else {
809 let i = self.parse_ascii(line)?;
810 Value::I64(i)
811 })
812 }
813
814 fn decode_text_long(&self, mut line: Vec<u8>) -> Result<Value> {
816 if line.last() == Some(&b'L') {
818 line.pop();
819 }
820 match BigInt::parse_bytes(&line, 10) {
821 Some(i) => Ok(Value::Int(i)),
822 None => self.error(ErrorCode::InvalidLiteral(line)),
823 }
824 }
825
826 fn decode_escaped_string(&self, slice: &[u8]) -> Result<Value> {
829 let slice = if (slice.len() >= 2)
831 && (slice[0] == slice[slice.len() - 1])
832 && (slice[0] == b'"' || slice[0] == b'\'')
833 {
834 &slice[1..slice.len() - 1]
835 } else {
836 slice
837 };
838 let mut result = Vec::with_capacity(slice.len());
839 let mut iter = slice.iter();
840 while let Some(&b) = iter.next() {
841 match b {
842 b'\\' => match iter.next() {
843 Some(&b'\\') => result.push(b'\\'),
844 Some(&b'a') => result.push(b'\x07'),
845 Some(&b'b') => result.push(b'\x08'),
846 Some(&b't') => result.push(b'\x09'),
847 Some(&b'n') => result.push(b'\x0a'),
848 Some(&b'v') => result.push(b'\x0b'),
849 Some(&b'f') => result.push(b'\x0c'),
850 Some(&b'r') => result.push(b'\x0d'),
851 Some(&b'x') => {
852 match iter.next().and_then(|&ch1| (ch1 as char).to_digit(16)).and_then(|v1| {
853 iter.next()
854 .and_then(|&ch2| (ch2 as char).to_digit(16))
855 .map(|v2| 16 * (v1 as u8) + (v2 as u8))
856 }) {
857 Some(v) => result.push(v),
858 None => return self.error(ErrorCode::InvalidLiteral(slice.into())),
859 }
860 }
861 _ => return self.error(ErrorCode::InvalidLiteral(slice.into())),
862 },
863 _ => result.push(b),
864 }
865 }
866 self.decode_string(result)
867 }
868
869 fn decode_escaped_unicode(&self, s: &[u8]) -> Result<Value> {
873 let mut result = String::with_capacity(s.len());
874 let mut iter = s.iter();
875 while let Some(&b) = iter.next() {
876 match b {
877 b'\\' => {
878 let nescape = match iter.next() {
879 Some(&b'u') => 4,
880 Some(&b'U') => 8,
881 _ => return self.error(ErrorCode::InvalidLiteral(s.into())),
882 };
883 let mut accum = 0;
884 for _i in 0..nescape {
885 accum *= 16;
886 match iter.next().and_then(|&ch| (ch as char).to_digit(16)) {
887 Some(v) => accum += v,
888 None => return self.error(ErrorCode::InvalidLiteral(s.into())),
889 }
890 }
891 match char::from_u32(accum) {
892 Some(v) => result.push(v),
893 None => return self.error(ErrorCode::InvalidLiteral(s.into())),
894 }
895 }
896 _ => result.push(b as char),
897 }
898 }
899 Ok(Value::String(result))
900 }
901
902 fn decode_string(&self, string: Vec<u8>) -> Result<Value> {
904 if self.options.decode_strings {
905 self.decode_unicode(string)
906 } else {
907 Ok(Value::Bytes(string))
908 }
909 }
910
911 fn decode_unicode(&self, string: Vec<u8>) -> Result<Value> {
913 match String::from_utf8(string) {
914 Ok(v) => Ok(Value::String(v)),
915 Err(_) => self.error(ErrorCode::StringNotUTF8),
916 }
917 }
918
919 fn decode_binary_long(&self, bytes: Vec<u8>) -> Value {
921 let negative = !bytes.is_empty() && (bytes[bytes.len() - 1] & 0x80 != 0);
924 let mut val = BigInt::from_bytes_le(Sign::Plus, &bytes);
925 if negative {
926 val -= BigInt::from(1) << (bytes.len() * 8);
927 }
928 Value::Int(val)
929 }
930
931 fn modify_list<F>(&mut self, f: F) -> Result<()>
933 where
934 F: FnOnce(&mut Vec<Value>),
935 {
936 let pos = self.pos;
937 let top = self.top()?;
938 if let Value::List(ref mut list) = *top {
939 f(list);
940 Ok(())
941 } else {
942 Self::stack_error("list", top, pos)
943 }
944 }
945
946 fn extend_dict(dict: &mut Vec<(Value, Value)>, items: Vec<Value>) {
948 let mut key = None;
949 for value in items {
950 match key.take() {
951 None => key = Some(value),
952 Some(key) => dict.push((key, value)),
953 }
954 }
955 }
956
957 fn modify_dict<F>(&mut self, f: F) -> Result<()>
959 where
960 F: FnOnce(&mut Vec<(Value, Value)>),
961 {
962 let pos = self.pos;
963 let top = self.top()?;
964 if let Value::Dict(ref mut dict) = *top {
965 f(dict);
966 Ok(())
967 } else {
968 Self::stack_error("dict", top, pos)
969 }
970 }
971
972 fn modify_set<F>(&mut self, f: F) -> Result<()>
974 where
975 F: FnOnce(&mut Vec<Value>),
976 {
977 let pos = self.pos;
978 let top = self.top()?;
979 if let Value::Set(ref mut set) = *top {
980 f(set);
981 Ok(())
982 } else {
983 Self::stack_error("set", top, pos)
984 }
985 }
986
987 fn decode_global(&mut self, modname: Vec<u8>, globname: Vec<u8>) -> Result<Value> {
989 let value = match (&*modname, &*globname) {
990 (b"_codecs", b"encode") => Value::Global(Global::Encode),
991 (b"copy_reg", b"_reconstructor") | (b"copyreg", b"_reconstructor") => {
992 Value::Global(Global::Reconst)
993 }
994 (b"__builtin__", b"set") | (b"builtins", b"set") => Value::Global(Global::Set),
995 (b"__builtin__", b"frozenset") | (b"builtins", b"frozenset") => Value::Global(Global::Frozenset),
996 (b"__builtin__", b"list") | (b"builtins", b"list") => Value::Global(Global::List),
997 (b"__builtin__", b"bytearray") | (b"builtins", b"bytearray") => Value::Global(Global::Bytearray),
998 (b"__builtin__", b"int") | (b"builtins", b"int") => Value::Global(Global::Int),
999 _ => Value::Global(Global::Other),
1000 };
1001 Ok(value)
1002 }
1003
1004 fn reduce_global(&mut self, global: Value, mut argtuple: Vec<Value>) -> Result<()> {
1006 match global {
1007 Value::Global(Global::Set) => match argtuple.pop().map(|v| self.resolve(v)).transpose()? {
1008 Some(Value::List(items)) => {
1009 self.stack.push(Value::Set(items));
1010 Ok(())
1011 }
1012 _ => self.error(ErrorCode::InvalidValue("set() arg".into())),
1013 },
1014 Value::Global(Global::Frozenset) => match argtuple.pop().map(|v| self.resolve(v)).transpose()? {
1015 Some(Value::List(items)) => {
1016 self.stack.push(Value::FrozenSet(items));
1017 Ok(())
1018 }
1019 _ => self.error(ErrorCode::InvalidValue("frozenset() arg".into())),
1020 },
1021 Value::Global(Global::Bytearray) => {
1022 argtuple.truncate(1);
1024 match argtuple.pop().map(|v| self.resolve(v)).transpose()? {
1025 Some(Value::Bytes(bytes)) => {
1026 self.stack.push(Value::Bytes(bytes));
1027 Ok(())
1028 }
1029 Some(Value::String(string)) => {
1030 self.stack
1033 .push(Value::Bytes(string.chars().map(|ch| ch as u32 as u8).collect()));
1034 Ok(())
1035 }
1036 _ => self.error(ErrorCode::InvalidValue("bytearray() arg".into())),
1037 }
1038 }
1039 Value::Global(Global::List) => match argtuple.pop().map(|v| self.resolve(v)).transpose()? {
1040 Some(Value::List(items)) => {
1041 self.stack.push(Value::List(items));
1042 Ok(())
1043 }
1044 _ => self.error(ErrorCode::InvalidValue("list() arg".into())),
1045 },
1046 Value::Global(Global::Int) => match argtuple.pop().map(|v| self.resolve(v)).transpose()? {
1047 Some(Value::Int(integer)) => {
1048 self.stack.push(Value::Int(integer));
1049 Ok(())
1050 }
1051 _ => self.error(ErrorCode::InvalidValue("int() arg".into())),
1052 },
1053 Value::Global(Global::Encode) => {
1054 match argtuple.pop().map(|v| self.resolve(v)).transpose()? {
1056 Some(Value::String(_)) => {}
1058 _ => return self.error(ErrorCode::InvalidValue("encode() arg".into())),
1059 }
1060 match argtuple.pop().map(|v| self.resolve(v)).transpose()? {
1061 Some(Value::String(s)) => {
1062 let bytes = s.chars().map(|ch| ch as u8).collect();
1066 self.stack.push(Value::Bytes(bytes));
1067 Ok(())
1068 }
1069 _ => self.error(ErrorCode::InvalidValue("encode() arg".into())),
1070 }
1071 }
1072 Value::Global(Global::Reconst) => {
1073 if self.options.keep_restore_state {
1076 let state = match argtuple.pop().map(|v| self.resolve(v)).transpose()? {
1077 Some(obj) => obj,
1078 None => Value::Dict(Vec::new()),
1079 };
1080 self.stack.push(state);
1081 } else {
1082 self.stack.push(Value::Dict(Vec::new()));
1083 }
1084 Ok(())
1085 }
1086 Value::Global(Global::Other) => {
1087 if self.options.keep_restore_state {
1091 let result: Result<_> = argtuple.into_iter().map(|v| self.resolve(v)).collect();
1092 self.stack.push(Value::Tuple(result?));
1093 } else {
1094 self.stack.push(Value::Global(Global::Other));
1095 }
1096 Ok(())
1097 }
1098 other => Self::stack_error("global reference", &other, self.pos),
1099 }
1100 }
1101
1102 fn stack_error<T>(what: &'static str, value: &Value, pos: usize) -> Result<T> {
1103 let it = format!("{:?}", value);
1104 Err(Error::Eval(ErrorCode::InvalidStackTop(what, it), pos))
1105 }
1106
1107 fn error<T>(&self, reason: ErrorCode) -> Result<T> {
1108 Err(Error::Eval(reason, self.pos))
1109 }
1110
1111 fn convert_value(&mut self, value: Value) -> Result<value::Value> {
1112 match value {
1113 Value::None => Ok(value::Value::None),
1114 Value::Bool(v) => Ok(value::Value::Bool(v)),
1115 Value::I64(v) => Ok(value::Value::I64(v)),
1116 Value::Int(v) => {
1117 if let Some(i) = v.to_i64() {
1118 Ok(value::Value::I64(i))
1119 } else {
1120 Ok(value::Value::Int(v))
1121 }
1122 }
1123 Value::F64(v) => Ok(value::Value::F64(v)),
1124 Value::Bytes(v) => Ok(value::Value::Bytes(v)),
1125 Value::String(v) => Ok(value::Value::String(v)),
1126 Value::List(v) => {
1127 let new = v.into_iter().map(|v| self.convert_value(v)).collect::<Result<_>>();
1128 Ok(value::Value::List(new?))
1129 }
1130 Value::Tuple(v) => {
1131 let new = v.into_iter().map(|v| self.convert_value(v)).collect::<Result<_>>();
1132 Ok(value::Value::Tuple(new?))
1133 }
1134 Value::Set(v) => {
1135 let new = v
1136 .into_iter()
1137 .map(|v| self.convert_value(v).and_then(|rv| rv.into_hashable()))
1138 .collect::<Result<_>>();
1139 Ok(value::Value::Set(new?))
1140 }
1141 Value::FrozenSet(v) => {
1142 let new = v
1143 .into_iter()
1144 .map(|v| self.convert_value(v).and_then(|rv| rv.into_hashable()))
1145 .collect::<Result<_>>();
1146 Ok(value::Value::FrozenSet(new?))
1147 }
1148 Value::Dict(v) => {
1149 let mut map = BTreeMap::new();
1150 for (key, value) in v {
1151 let real_key = self.convert_value(key).and_then(|rv| rv.into_hashable())?;
1152 let real_value = self.convert_value(value)?;
1153 map.insert(real_key, real_value);
1154 }
1155 Ok(value::Value::Dict(map))
1156 }
1157 Value::MemoRef(memo_id) => {
1158 self.resolve_recursive(memo_id, (), |slf, (), value| slf.convert_value(value))
1159 }
1160 Value::Global(_) => {
1161 if self.options.replace_unresolved_globals {
1162 Ok(value::Value::None)
1163 } else {
1164 Err(Error::Syntax(ErrorCode::UnresolvedGlobal))
1165 }
1166 }
1167 }
1168 }
1169}
1170
1171impl<'de: 'a, 'a, R: Read> de::Deserializer<'de> for &'a mut Deserializer<R> {
1172 type Error = Error;
1173
1174 fn deserialize_any<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
1175 let value = self.get_next_value()?;
1176 match value {
1177 Value::None => visitor.visit_unit(),
1178 Value::Bool(v) => visitor.visit_bool(v),
1179 Value::I64(v) => visitor.visit_i64(v),
1180 Value::Int(v) => {
1181 if let Some(i) = v.to_i64() {
1182 visitor.visit_i64(i)
1183 } else {
1184 Err(Error::Syntax(ErrorCode::InvalidValue("integer too large".into())))
1185 }
1186 }
1187 Value::F64(v) => visitor.visit_f64(v),
1188 Value::Bytes(v) => visitor.visit_byte_buf(v),
1189 Value::String(v) => visitor.visit_string(v),
1190 Value::List(v) => {
1191 let len = v.len();
1192 visitor.visit_seq(SeqAccess { de: self, iter: v.into_iter(), len })
1193 }
1194 Value::Tuple(v) => visitor.visit_seq(SeqAccess { len: v.len(), iter: v.into_iter(), de: self }),
1195 Value::Set(v) | Value::FrozenSet(v) => {
1196 visitor.visit_seq(SeqAccess { de: self, len: v.len(), iter: v.into_iter() })
1197 }
1198 Value::Dict(v) => {
1199 let len = v.len();
1200 visitor.visit_map(MapAccess { de: self, iter: v.into_iter(), value: None, len })
1201 }
1202 Value::MemoRef(memo_id) => self.resolve_recursive(memo_id, visitor, |slf, visitor, value| {
1203 slf.value = Some(value);
1204 slf.deserialize_any(visitor)
1205 }),
1206 Value::Global(_) => {
1207 if self.options.replace_unresolved_globals {
1208 visitor.visit_unit()
1209 } else {
1210 Err(Error::Syntax(ErrorCode::UnresolvedGlobal))
1211 }
1212 }
1213 }
1214 }
1215
1216 #[inline]
1217 fn deserialize_option<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
1218 let value = self.get_next_value()?;
1219 match value {
1220 Value::None => visitor.visit_none(),
1221 _ => {
1222 self.value = Some(value);
1223 visitor.visit_some(self)
1224 }
1225 }
1226 }
1227
1228 #[inline]
1229 fn deserialize_newtype_struct<V: Visitor<'de>>(
1230 self, _name: &'static str, visitor: V,
1231 ) -> Result<V::Value> {
1232 visitor.visit_newtype_struct(self)
1233 }
1234
1235 #[inline]
1236 fn deserialize_enum<V: Visitor<'de>>(
1237 self, _name: &'static str, _variants: &'static [&'static str], visitor: V,
1238 ) -> Result<V::Value> {
1239 visitor.visit_enum(VariantAccess { de: self })
1240 }
1241
1242 forward_to_deserialize_any! {
1243 bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
1244 bytes byte_buf map tuple_struct struct identifier
1245 tuple ignored_any unit_struct
1246 }
1247}
1248
1249struct VariantAccess<'a, R: Read + 'a> {
1250 de: &'a mut Deserializer<R>,
1251}
1252
1253impl<'de: 'a, 'a, R: Read + 'a> de::EnumAccess<'de> for VariantAccess<'a, R> {
1254 type Error = Error;
1255 type Variant = Self;
1256
1257 fn variant_seed<V: de::DeserializeSeed<'de>>(self, seed: V) -> Result<(V::Value, Self)> {
1258 let value = self.de.get_next_value()?;
1259 match value {
1260 Value::Tuple(mut v) => {
1261 if v.len() == 2 {
1262 let args = v.pop();
1263 self.de.value = v.pop();
1264 let val = seed.deserialize(&mut *self.de)?;
1265 self.de.value = args;
1266 Ok((val, self))
1267 } else {
1268 self.de.value = v.pop();
1269 let val = seed.deserialize(&mut *self.de)?;
1270 Ok((val, self))
1271 }
1272 }
1273 Value::Dict(mut v) => {
1274 if v.len() != 1 {
1275 Err(Error::Syntax(ErrorCode::Structure(
1276 "enum variants must have one dict entry".into(),
1277 )))
1278 } else {
1279 let (name, args) = v.pop().unwrap();
1280 self.de.value = Some(name);
1281 let val = seed.deserialize(&mut *self.de)?;
1282 self.de.value = Some(args);
1283 Ok((val, self))
1284 }
1285 }
1286 Value::MemoRef(memo_id) => {
1287 self.de.resolve_recursive(memo_id, (), |slf, (), value| {
1288 slf.value = Some(value);
1289 Ok(())
1290 })?;
1291 self.variant_seed(seed)
1293 }
1294 s @ Value::String(_) => {
1295 self.de.value = Some(s);
1296 let val = seed.deserialize(&mut *self.de)?;
1297 Ok((val, self))
1298 }
1299 _ => Err(Error::Syntax(ErrorCode::Structure(
1300 "enums must be represented as dicts or tuples".into(),
1301 ))),
1302 }
1303 }
1304}
1305
1306impl<'de: 'a, 'a, R: Read + 'a> de::VariantAccess<'de> for VariantAccess<'a, R> {
1307 type Error = Error;
1308
1309 fn unit_variant(self) -> Result<()> {
1310 Ok(())
1311 }
1312
1313 fn newtype_variant_seed<T: de::DeserializeSeed<'de>>(self, seed: T) -> Result<T::Value> {
1314 seed.deserialize(self.de)
1315 }
1316
1317 fn tuple_variant<V: Visitor<'de>>(self, _len: usize, visitor: V) -> Result<V::Value> {
1318 de::Deserializer::deserialize_any(self.de, visitor)
1319 }
1320
1321 fn struct_variant<V: Visitor<'de>>(
1322 self, _fields: &'static [&'static str], visitor: V,
1323 ) -> Result<V::Value> {
1324 de::Deserializer::deserialize_any(self.de, visitor)
1325 }
1326}
1327
1328struct SeqAccess<'a, R: Read + 'a> {
1329 de: &'a mut Deserializer<R>,
1330 iter: vec::IntoIter<Value>,
1331 len: usize,
1332}
1333
1334impl<'de: 'a, 'a, R: Read> de::SeqAccess<'de> for SeqAccess<'a, R> {
1335 type Error = Error;
1336
1337 fn next_element_seed<T: de::DeserializeSeed<'de>>(&mut self, seed: T) -> Result<Option<T::Value>> {
1338 match self.iter.next() {
1339 Some(value) => {
1340 self.len -= 1;
1341 self.de.value = Some(value);
1342 Ok(Some(seed.deserialize(&mut *self.de)?))
1343 }
1344 None => Ok(None),
1345 }
1346 }
1347
1348 fn size_hint(&self) -> Option<usize> {
1349 Some(self.len)
1350 }
1351}
1352
1353struct MapAccess<'a, R: Read + 'a> {
1354 de: &'a mut Deserializer<R>,
1355 iter: vec::IntoIter<(Value, Value)>,
1356 value: Option<Value>,
1357 len: usize,
1358}
1359
1360impl<'de: 'a, 'a, R: Read> de::MapAccess<'de> for MapAccess<'a, R> {
1361 type Error = Error;
1362
1363 fn next_key_seed<T: de::DeserializeSeed<'de>>(&mut self, seed: T) -> Result<Option<T::Value>> {
1364 match self.iter.next() {
1365 Some((key, value)) => {
1366 self.len -= 1;
1367 self.value = Some(value);
1368 self.de.value = Some(key);
1369 Ok(Some(seed.deserialize(&mut *self.de)?))
1370 }
1371 None => Ok(None),
1372 }
1373 }
1374
1375 fn next_value_seed<T: de::DeserializeSeed<'de>>(&mut self, seed: T) -> Result<T::Value> {
1376 let value = self.value.take().unwrap();
1377 self.de.value = Some(value);
1378 seed.deserialize(&mut *self.de)
1379 }
1380
1381 fn size_hint(&self) -> Option<usize> {
1382 Some(self.len)
1383 }
1384}
1385
1386pub fn from_reader<'de, R: io::Read, T: de::Deserialize<'de>>(rdr: R, options: DeOptions) -> Result<T> {
1388 let mut de = Deserializer::new(rdr, options);
1389 let value = de::Deserialize::deserialize(&mut de)?;
1390 de.end()?;
1392 Ok(value)
1393}
1394
1395pub fn from_slice<'de, T: de::Deserialize<'de>>(v: &[u8], options: DeOptions) -> Result<T> {
1397 from_reader(io::Cursor::new(v), options)
1398}
1399
1400pub fn from_iter<'de, E, I, T>(it: I, options: DeOptions) -> Result<T>
1402where
1403 E: IterReadItem,
1404 I: FusedIterator<Item = E>,
1405 T: de::Deserialize<'de>,
1406{
1407 from_reader(IterRead::new(it), options)
1408}
1409
1410pub fn value_from_reader<R: io::Read>(rdr: R, options: DeOptions) -> Result<value::Value> {
1412 let mut de = Deserializer::new(rdr, options);
1413 let value = de.deserialize_value()?;
1414 de.end()?;
1415 Ok(value)
1416}
1417
1418pub fn value_from_slice(v: &[u8], options: DeOptions) -> Result<value::Value> {
1420 value_from_reader(io::Cursor::new(v), options)
1421}
1422
1423pub fn value_from_iter<E, I>(it: I, options: DeOptions) -> Result<value::Value>
1425where
1426 E: IterReadItem,
1427 I: FusedIterator<Item = E>,
1428{
1429 value_from_reader(IterRead::new(it), options)
1430}