torrust_serde_bencode/
de.rs

1//! Deserialize bencode data to a Rust data structure
2
3use crate::error::{Error, Result};
4use serde::{
5    de::{self, Error as _, Unexpected},
6    forward_to_deserialize_any,
7};
8use std::io::Read;
9use std::str;
10
11#[doc(hidden)]
12// todo: This should be pub(crate).
13pub struct BencodeAccess<'a, R: 'a + Read> {
14    de: &'a mut Deserializer<R>,
15    len: Option<usize>,
16}
17
18impl<'a, R: 'a + Read> BencodeAccess<'a, R> {
19    fn new(de: &'a mut Deserializer<R>, len: Option<usize>) -> BencodeAccess<'a, R> {
20        BencodeAccess { de, len }
21    }
22}
23
24impl<'de, 'a, R: 'a + Read> de::SeqAccess<'de> for BencodeAccess<'a, R> {
25    type Error = Error;
26
27    fn next_element_seed<T: de::DeserializeSeed<'de>>(
28        &mut self,
29        seed: T,
30    ) -> Result<Option<T::Value>> {
31        let res = match self.de.parse()? {
32            ParseResult::End => Ok(None),
33            r => {
34                self.de.next = Some(r);
35                Ok(Some(seed.deserialize(&mut *self.de)?))
36            }
37        };
38        if let Some(l) = self.len {
39            let l = l - 1;
40            self.len = Some(l);
41            if l == 0 && ParseResult::End != self.de.parse()? {
42                return Err(Error::InvalidType("expected `e`".to_string()));
43            }
44        }
45        res
46    }
47}
48
49impl<'de, 'a, R: 'a + Read> de::MapAccess<'de> for BencodeAccess<'a, R> {
50    type Error = Error;
51    fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
52    where
53        K: de::DeserializeSeed<'de>,
54    {
55        match self.de.parse()? {
56            ParseResult::End => Ok(None),
57            r => {
58                self.de.next = Some(r);
59                Ok(Some(seed.deserialize(&mut *self.de)?))
60            }
61        }
62    }
63
64    fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
65    where
66        V: de::DeserializeSeed<'de>,
67    {
68        seed.deserialize(&mut *self.de)
69    }
70}
71
72impl<'de, 'a, R: 'a + Read> de::VariantAccess<'de> for BencodeAccess<'a, R> {
73    type Error = Error;
74
75    fn unit_variant(self) -> Result<()> {
76        Ok(())
77    }
78
79    fn newtype_variant_seed<T: de::DeserializeSeed<'de>>(self, seed: T) -> Result<T::Value> {
80        let res = seed.deserialize(&mut *self.de)?;
81        if ParseResult::End != self.de.parse()? {
82            return Err(Error::InvalidType("expected `e`".to_string()));
83        }
84        Ok(res)
85    }
86
87    fn tuple_variant<V: de::Visitor<'de>>(self, len: usize, visitor: V) -> Result<V::Value> {
88        let res = match self.de.parse()? {
89            ParseResult::List => visitor.visit_seq(BencodeAccess::new(&mut *self.de, Some(len)))?,
90            _ => return Err(Error::InvalidType("expected list".to_string())),
91        };
92        if ParseResult::End != self.de.parse()? {
93            return Err(Error::InvalidType("expected `e`".to_string()));
94        }
95        Ok(res)
96    }
97
98    fn struct_variant<V: de::Visitor<'de>>(
99        self,
100        _: &'static [&'static str],
101        visitor: V,
102    ) -> Result<V::Value> {
103        let res = de::Deserializer::deserialize_any(&mut *self.de, visitor)?;
104        if ParseResult::End != self.de.parse()? {
105            return Err(Error::InvalidType("expected `e`".to_string()));
106        }
107        Ok(res)
108    }
109}
110
111impl<'de, 'a, R: 'a + Read> de::EnumAccess<'de> for BencodeAccess<'a, R> {
112    type Error = Error;
113    type Variant = Self;
114    fn variant_seed<V: de::DeserializeSeed<'de>>(self, seed: V) -> Result<(V::Value, Self)> {
115        match self.de.parse()? {
116            t @ ParseResult::Bytes(_) => {
117                self.de.next = Some(t);
118                Ok((seed.deserialize(&mut *self.de)?, self))
119            }
120            ParseResult::Map => Ok((seed.deserialize(&mut *self.de)?, self)),
121            t => Err(Error::InvalidValue(format!(
122                "Expected bytes or map; got `{:?}`",
123                t
124            ))),
125        }
126    }
127}
128
129#[derive(Debug, Eq, PartialEq)]
130enum ParseResult {
131    Int(i64),
132    Bytes(Vec<u8>),
133    /// list start
134    List,
135    /// map start
136    Map,
137    /// list or map end
138    End,
139}
140
141impl ParseResult {
142    fn to_unexpected_error(&self, expected: &str) -> Error {
143        match self {
144            Self::Int(i) => Error::invalid_type(Unexpected::Signed(*i), &expected),
145            Self::Bytes(bytes) => Error::invalid_type(Unexpected::Bytes(bytes), &expected),
146            Self::List => Error::invalid_type(Unexpected::Seq, &expected),
147            Self::Map => Error::invalid_type(Unexpected::Map, &expected),
148            Self::End => Error::custom(format_args!("unexpected end, expected {}", expected)),
149        }
150    }
151}
152
153/// A structure for deserializing bencode into Rust values.
154#[derive(Debug)]
155pub struct Deserializer<R: Read> {
156    reader: R,
157    next: Option<ParseResult>,
158}
159
160impl<'de, R: Read> Deserializer<R> {
161    /// Create a new deserializer.
162    pub fn new(reader: R) -> Deserializer<R> {
163        Deserializer { reader, next: None }
164    }
165
166    fn parse_int(&mut self) -> Result<i64> {
167        let mut buf = [0; 1];
168        let mut result = Vec::new();
169        loop {
170            if 1 != self.reader.read(&mut buf).map_err(Error::IoError)? {
171                return Err(Error::EndOfStream);
172            }
173            match buf[0] {
174                b'e' => {
175                    let len_str = String::from_utf8(result).map_err(|_| {
176                        Error::InvalidValue("Non UTF-8 integer encoding".to_string())
177                    })?;
178                    let len_int = len_str.parse().map_err(|_| {
179                        Error::InvalidValue(format!("Can't parse `{}` as integer", len_str))
180                    })?;
181                    return Ok(len_int);
182                }
183                n => result.push(n),
184            }
185        }
186    }
187
188    fn parse_bytes_len(&mut self, len_char: u8) -> Result<usize> {
189        let mut buf = [0; 1];
190        let mut len = Vec::new();
191        len.push(len_char);
192        loop {
193            if 1 != self.reader.read(&mut buf).map_err(Error::IoError)? {
194                return Err(Error::EndOfStream);
195            }
196            match buf[0] {
197                b':' => {
198                    let len_str = String::from_utf8(len).map_err(|_| {
199                        Error::InvalidValue("Non UTF-8 integer encoding".to_string())
200                    })?;
201                    let len_int = len_str.parse().map_err(|_| {
202                        Error::InvalidValue(format!("Can't parse `{}` as string length", len_str))
203                    })?;
204                    return Ok(len_int);
205                }
206                n => len.push(n),
207            }
208        }
209    }
210
211    fn parse_bytes(&mut self, len_char: u8) -> Result<Vec<u8>> {
212        let len = self.parse_bytes_len(len_char)?;
213        let mut buf = vec![0u8; len];
214        let actual_len = self
215            .reader
216            .read(buf.as_mut_slice())
217            .map_err(Error::IoError)?;
218        if len != actual_len {
219            return Err(Error::EndOfStream);
220        }
221        Ok(buf)
222    }
223
224    fn parse(&mut self) -> Result<ParseResult> {
225        if let Some(t) = self.next.take() {
226            return Ok(t);
227        }
228        let mut buf = [0; 1];
229        if 1 != self.reader.read(&mut buf).map_err(Error::IoError)? {
230            return Err(Error::EndOfStream);
231        }
232        match buf[0] {
233            b'i' => Ok(ParseResult::Int(self.parse_int()?)),
234            n @ b'0'..=b'9' => Ok(ParseResult::Bytes(self.parse_bytes(n)?)),
235            b'l' => Ok(ParseResult::List),
236            b'd' => Ok(ParseResult::Map),
237            b'e' => Ok(ParseResult::End),
238            c => Err(Error::InvalidValue(format!(
239                "Invalid character `{}`",
240                c as char
241            ))),
242        }
243    }
244}
245
246impl<'de, 'a, R: Read> de::Deserializer<'de> for &'a mut Deserializer<R> {
247    type Error = Error;
248
249    #[inline]
250    fn deserialize_any<V: de::Visitor<'de>>(mut self, visitor: V) -> Result<V::Value> {
251        match self.parse()? {
252            ParseResult::Int(i) => visitor.visit_i64(i),
253            ParseResult::Bytes(s) => visitor.visit_bytes(s.as_ref()),
254            ParseResult::List => visitor.visit_seq(BencodeAccess::new(&mut self, None)),
255            ParseResult::Map => visitor.visit_map(BencodeAccess::new(&mut self, None)),
256            ParseResult::End => Err(Error::EndOfStream),
257        }
258    }
259
260    forward_to_deserialize_any! {
261        bool char i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 unit bytes byte_buf seq map unit_struct
262        tuple_struct ignored_any struct
263    }
264
265    #[inline]
266    fn deserialize_newtype_struct<V: de::Visitor<'de>>(
267        self,
268        _name: &'static str,
269        visitor: V,
270    ) -> Result<V::Value> {
271        visitor.visit_newtype_struct(self)
272    }
273
274    #[inline]
275    fn deserialize_option<V: de::Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
276        visitor.visit_some(self)
277    }
278
279    #[inline]
280    fn deserialize_enum<V>(
281        self,
282        _name: &str,
283        _variants: &'static [&'static str],
284        visitor: V,
285    ) -> Result<V::Value>
286    where
287        V: de::Visitor<'de>,
288    {
289        visitor.visit_enum(BencodeAccess::new(self, None))
290    }
291
292    // Do not delegate this to `deserialize_any` because we want to call `visit_str` instead of
293    // `visit_bytes` on the visitor, to correctly support adjacently tagged enums (the tag is
294    // parsed as str, not bytes).
295    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value>
296    where
297        V: de::Visitor<'de>,
298    {
299        let bytes = self.parse().and_then(|r| match r {
300            ParseResult::Bytes(bytes) => Ok(bytes),
301            _ => Err(r.to_unexpected_error("bytes")),
302        })?;
303
304        let s = str::from_utf8(&bytes)
305            .map_err(|_| Error::invalid_value(Unexpected::Bytes(&bytes), &"utf-8 string"))?;
306        visitor.visit_str(s)
307    }
308
309    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
310    where
311        V: de::Visitor<'de>,
312    {
313        self.deserialize_str(visitor)
314    }
315
316    fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value>
317    where
318        V: de::Visitor<'de>,
319    {
320        self.deserialize_str(visitor)
321    }
322
323    fn deserialize_tuple<V>(self, size: usize, visitor: V) -> Result<V::Value>
324    where
325        V: de::Visitor<'de>,
326    {
327        self.parse().and_then(|r| match r {
328            ParseResult::List => Ok(()),
329            _ => Err(r.to_unexpected_error("list")),
330        })?;
331
332        visitor.visit_seq(BencodeAccess::new(self, Some(size)))
333    }
334}
335
336/// Deserialize an instance of type `T` from a string of bencode.
337///
338/// # Examples
339/// ```
340/// # fn main() -> Result<(), serde_bencode::Error> {
341/// use serde_derive::{Serialize, Deserialize};
342///
343/// #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
344/// struct Address {
345///     street: String,
346///     city: String,
347/// }
348///
349/// let encoded = "d4:city18:Duckburg, Calisota6:street17:1313 Webfoot Walke".to_string();
350/// let decoded: Address = serde_bencode::from_str(&encoded)?;
351///
352/// assert_eq!(
353///     decoded,
354///     Address {
355///         street: "1313 Webfoot Walk".to_string(),
356///         city: "Duckburg, Calisota".to_string(),
357///     }
358/// );
359/// # Ok(())
360/// # }
361/// ```
362///
363/// # Errors
364///
365/// This conversion can fail if the input bencode is improperly formatted or if the structure of
366/// the input does not match the structure expected by `T`. It can also fail if `T`'s
367/// implementation of `Deserialize` decides to fail.
368pub fn from_str<'de, T>(s: &'de str) -> Result<T>
369where
370    T: de::Deserialize<'de>,
371{
372    from_bytes(s.as_bytes())
373}
374
375/// Deserialize an instance of type `T` from a bencode byte vector.
376///
377/// # Examples
378/// ```
379/// # fn main() -> Result<(), serde_bencode::Error> {
380/// use serde_derive::{Serialize, Deserialize};
381///
382/// #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
383/// struct Address {
384///     street: String,
385///     city: String,
386/// }
387///
388/// let encoded = "d4:city18:Duckburg, Calisota6:street17:1313 Webfoot Walke".as_bytes();
389/// let decoded: Address = serde_bencode::from_bytes(&encoded)?;
390///
391/// assert_eq!(
392///     decoded,
393///     Address {
394///         street: "1313 Webfoot Walk".to_string(),
395///         city: "Duckburg, Calisota".to_string(),
396///     }
397/// );
398/// # Ok(())
399/// # }
400/// ```
401///
402/// # Errors
403///
404/// This conversion can fail if the input bencode is improperly formatted or if the structure of
405/// the input does not match the structure expected by `T`. It can also fail if `T`'s
406/// implementation of `Deserialize` decides to fail.
407pub fn from_bytes<'de, T>(b: &'de [u8]) -> Result<T>
408where
409    T: de::Deserialize<'de>,
410{
411    de::Deserialize::deserialize(&mut Deserializer::new(b))
412}