pipe_chain/parser/
bencode.rs

1//! Bencode parsing module
2use crate::{
3    byte::{ascii_digits, take, TagByteError, TagBytesError},
4    tag, AndExt, AndThenExt, EitherExt, Incomplete, MapExt, OptExt, OrExt, Pipe,
5    Result as PResult, UnpackExt,
6};
7use either::Either;
8use fatal_error::FatalError;
9use std::{
10    collections::BTreeMap, fmt::Debug, num::ParseIntError, str::from_utf8_unchecked,
11};
12
13/// Bencode parsing error
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum Error {
16    /// parse int error
17    Int(ParseIntError),
18    /// Invalid number
19    InvalidNumber,
20    /// Invalid string size
21    InvalidStringSize,
22    /// more bytes needed
23    Incomplete(Incomplete),
24    /// parse max depth reached
25    MaxDepth,
26    /// invalid dict key
27    InvalidKey(Bencode),
28    /// dick has a key but no value
29    DictNoValue,
30    /// tag error
31    TagByte(TagByteError),
32    /// tag error
33    TagBytes(TagBytesError),
34}
35
36impl From<ParseIntError> for Error {
37    fn from(value: ParseIntError) -> Self { Error::Int(value) }
38}
39
40impl From<Incomplete> for Error {
41    fn from(value: Incomplete) -> Self { Error::Incomplete(value) }
42}
43
44impl From<TagByteError> for Error {
45    fn from(value: TagByteError) -> Self { Error::TagByte(value) }
46}
47impl From<TagBytesError> for Error {
48    fn from(value: TagBytesError) -> Self { Error::TagBytes(value) }
49}
50
51impl std::fmt::Display for Error {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        match self {
54            Error::Int(x) => write!(f, "Invalid i64: {x}"),
55            Error::InvalidNumber => write!(f, "Invalid number"),
56            Error::InvalidStringSize => write!(f, "Invalid string size"),
57            Error::Incomplete(x) => Debug::fmt(x, f),
58            Error::MaxDepth => write!(f, "Max depth reached"),
59            Error::InvalidKey(x) => write!(f, "Invalid dict key: {x:?}"),
60            Error::DictNoValue => write!(f, "Missing dict value"),
61            Error::TagByte(x) => write!(f, "Bencode: {x}",),
62            Error::TagBytes(x) => write!(f, "Bencode: {x}",),
63        }
64    }
65}
66
67impl std::error::Error for Error {
68    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
69        match self {
70            Error::Int(x) => Some(x),
71            Error::Incomplete(x) => Some(x),
72            _ => None,
73        }
74    }
75}
76
77/// Bencode element
78#[derive(Debug, Clone, PartialEq, Eq)]
79pub enum Bencode {
80    /// Bencode integer
81    Integer(i64),
82    /// Bencode string
83    String(Vec<u8>),
84    /// Bencode list
85    List(Vec<Bencode>),
86    /// Bencode dict
87    Dict(BTreeMap<Vec<u8>, Bencode>),
88}
89
90fn integer<'a>(x: &'a [u8]) -> PResult<&'a [u8], (i64,), Error> {
91    tag(b'i')
92        .and_other(tag(b'-').opt().map1(|x: Option<_>| x.is_some()))
93        .and(ascii_digits(1..))
94        .and_self(tag(b'e'))
95        .and_then(|x, y: &'a [u8]| {
96            if (x && y.starts_with(b"0")) || y.starts_with(b"00") {
97                Err(FatalError::Fatal(Error::InvalidNumber))
98            } else {
99                format!(
100                    "{}{}",
101                    if x { "-" } else { "" },
102                    //safe because we read the digits just before
103                    unsafe { from_utf8_unchecked(y) }
104                )
105                .parse::<i64>()
106                .map(|x| (x,))
107                .map_err(|x| FatalError::Fatal(x.into()))
108            }
109        })
110        .apply(x)
111}
112
113fn usize<'a>(x: &'a [u8]) -> PResult<&'a [u8], (usize,), Error> {
114    ascii_digits(1..)
115        .and_then(|nb: &'a [u8]| match nb {
116            b"0" => Ok((0,)),
117            x if x.starts_with(b"0") => Err(FatalError::Fatal(Error::InvalidStringSize)),
118            x => Ok((unsafe { from_utf8_unchecked(x) }
119                .parse::<usize>()
120                .map_err(|x| FatalError::Fatal(Error::Int(x)))?,)),
121        })
122        .apply(x)
123}
124
125fn string(x: &[u8]) -> PResult<&[u8], (Vec<u8>,), Error> {
126    usize
127        .and_self(tag(b':'))
128        .unpack()
129        .ok_and_then(|i, nb| match nb {
130            0 => Ok((i, (Vec::new(),))),
131            x => take(x).map1(|x: &[u8]| x.to_vec()).apply(i),
132        })
133        .apply(x)
134}
135
136fn single_item(x: &[u8]) -> PResult<&[u8], (Bencode,), Error> {
137    integer.map1(Bencode::Integer).or(string.map1(Bencode::String)).apply(x)
138}
139
140#[derive(Debug, Default)]
141struct DictState {
142    data: BTreeMap<Vec<u8>, Bencode>,
143    key:  Option<Vec<u8>>,
144}
145
146impl DictState {
147    fn push_item(&mut self, item: Bencode) -> Result<(), FatalError<Error>> {
148        match (self.key.take(), item) {
149            (None, Bencode::String(x)) => {
150                self.key = Some(x);
151                Ok(())
152            }
153            (Some(k), v @ Bencode::Integer(_)) => {
154                self.data.insert(k, v);
155                Ok(())
156            }
157            (Some(k), v @ Bencode::String(_)) => {
158                self.data.insert(k, v);
159                Ok(())
160            }
161            (Some(k), v @ Bencode::List(_)) => {
162                self.data.insert(k, v);
163                Ok(())
164            }
165            (Some(k), v @ Bencode::Dict(_)) => {
166                self.data.insert(k, v);
167                Ok(())
168            }
169            (None, x) => Err(FatalError::Fatal(Error::InvalidKey(x))),
170        }
171    }
172
173    fn finish(self) -> Result<Bencode, FatalError<Error>> {
174        if self.key.is_none() {
175            Ok(Bencode::Dict(self.data))
176        } else {
177            Err(FatalError::Fatal(Error::DictNoValue))
178        }
179    }
180}
181
182enum ListDict {
183    List(Vec<Bencode>),
184    Dict(DictState),
185}
186
187impl ListDict {
188    fn push_item(&mut self, item: Bencode) -> Result<(), FatalError<Error>> {
189        match self {
190            ListDict::List(x) => {
191                x.push(item);
192                Ok(())
193            }
194            ListDict::Dict(x) => x.push_item(item),
195        }
196    }
197
198    fn finish(self) -> Result<Bencode, FatalError<Error>> {
199        match self {
200            ListDict::List(x) => Ok(Bencode::List(x)),
201            ListDict::Dict(x) => x.finish(),
202        }
203    }
204}
205
206fn list_start(i: &[u8]) -> PResult<&[u8], (ListDict,), Error> {
207    tag(b'l').map1(|_| ListDict::List(vec![])).apply(i)
208}
209
210fn dict_start(i: &[u8]) -> PResult<&[u8], (ListDict,), Error> {
211    tag(b'd').map1(|_| ListDict::Dict(DictState::default())).apply(i)
212}
213
214fn single_dict_list(i: &[u8]) -> PResult<&[u8], (Either<Bencode, ListDict>,), Error> {
215    single_item.either(dict_start.or(list_start)).apply(i)
216}
217
218#[inline]
219fn bencode_impl(max_depth: usize, i: &[u8]) -> PResult<&[u8], (Bencode,), Error> {
220    match single_dict_list.unpack().apply(i)? {
221        (i, Either::Left(x)) => Ok((i, (x,))),
222        (mut i, Either::Right(x)) => {
223            let mut stack = vec![x];
224            loop {
225                if stack.len() > max_depth {
226                    return Err(FatalError::Fatal(Error::MaxDepth));
227                }
228                let ln = i.len();
229                if let (r, Some(_)) = tag(b'e').opt().unpack().apply(i)? {
230                    let x = stack.pop().unwrap().finish()?;
231                    if let Some(y) = stack.last_mut() {
232                        y.push_item(x)?;
233                    } else {
234                        return Ok((r, (x,)));
235                    }
236                    i = r
237                }
238                if let (r, Some(x)) = single_dict_list.opt().unpack().apply(i)? {
239                    match x {
240                        Either::Left(x) => stack.last_mut().unwrap().push_item(x)?,
241                        Either::Right(x) => stack.push(x),
242                    }
243                    i = r;
244                }
245                if ln == i.len() {
246                    break;
247                }
248            }
249            if stack.len() == 1 {
250                Ok((i, (stack.pop().unwrap().finish()?,)))
251            } else {
252                Err(FatalError::Fatal(Error::Incomplete(Incomplete::Unknown)))
253            }
254        }
255    }
256}
257
258/// parse a buffer containing bencode
259///
260/// if the bencode tree contains too much nested elements, a stackoverflow may occur on debug and deallocation
261pub fn bencode(i: &[u8]) -> PResult<&[u8], (Bencode,), Error> {
262    bencode_impl(usize::MAX, i)
263}
264
265/// parse a buffer containing bencode
266///
267/// the function stops parsing bencode if more than max_depth nested items are encountered
268pub fn bencode_limited(max_depth: usize, i: &[u8]) -> PResult<&[u8], (Bencode,), Error> {
269    bencode_impl(max_depth, i)
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_integer() {
278        let mut p = integer;
279        assert_eq!(p.apply(b"i42e"), Ok((&b""[..], (42i64,))));
280        assert_eq!(p.apply(b"i-42e"), Ok((&b""[..], (-42i64,))));
281        assert_eq!(p.apply(b"i-0e"), Err(FatalError::Fatal(Error::InvalidNumber)));
282        assert_eq!(p.apply(b"i0e"), Ok((&b""[..], (0i64,))));
283        assert_eq!(p.apply(b"i0004e"), Err(FatalError::Fatal(Error::InvalidNumber)));
284        let max = format!("i{}e", i64::MAX);
285        let min = format!("i{}e", i64::MIN);
286        assert_eq!(p.apply(max.as_bytes()), Ok((&b""[..], (i64::MAX,))));
287        assert_eq!(p.apply(min.as_bytes()), Ok((&b""[..], (i64::MIN,))));
288        p.apply(b"i0").expect_err("should fail");
289    }
290
291    #[test]
292    fn test_string() {
293        assert_eq!(string.apply(b"4:spam"), Ok((&b""[..], (b"spam".to_vec(),))));
294        assert_eq!(string.apply(b"3:cowd"), Ok((&b"d"[..], (b"cow".to_vec(),))));
295        assert_eq!(string.apply(b"0:cowd"), Ok((&b"cowd"[..], (b"".to_vec(),))));
296        assert_eq!(string.apply(b"0:"), Ok((&b""[..], (b"".to_vec(),))));
297    }
298
299    #[test]
300    fn test_list() {
301        assert_eq!(
302            bencode(b"l4:spam4:eggse"),
303            Ok((
304                &b""[..],
305                (Bencode::List(vec![
306                    Bencode::String(b"spam".to_vec()),
307                    Bencode::String(b"eggs".to_vec())
308                ]),)
309            ))
310        );
311        assert_eq!(
312            bencode(b"li10ei12ee"),
313            Ok((
314                &b""[..],
315                (Bencode::List(vec![Bencode::Integer(10), Bencode::Integer(12)]),)
316            ))
317        );
318        assert_eq!(
319            bencode(b"l4:spami10e3:cowi12ee"),
320            Ok((
321                &b""[..],
322                (Bencode::List(vec![
323                    Bencode::String(b"spam".to_vec()),
324                    Bencode::Integer(10),
325                    Bencode::String(b"cow".to_vec()),
326                    Bencode::Integer(12)
327                ]),)
328            ))
329        );
330        assert_eq!(
331            bencode(b"l4:spami10e3:cowi12ee"),
332            Ok((
333                &b""[..],
334                (Bencode::List(vec![
335                    Bencode::String(b"spam".to_vec()),
336                    Bencode::Integer(10),
337                    Bencode::String(b"cow".to_vec()),
338                    Bencode::Integer(12)
339                ]),)
340            ))
341        );
342        assert_eq!(bencode(b"le"), Ok((&b""[..], (Bencode::List(vec![]),))));
343    }
344
345    #[test]
346    fn test_dict() {
347        assert_eq!(
348            bencode(b"d3:cow3:moo4:spam4:eggse"),
349            Ok((
350                &b""[..],
351                (Bencode::Dict(
352                    [
353                        (b"cow".to_vec(), Bencode::String(b"moo".to_vec())),
354                        (b"spam".to_vec(), Bencode::String(b"eggs".to_vec()))
355                    ]
356                    .into()
357                ),)
358            ))
359        );
360        assert_eq!(
361            bencode(b"d4:spaml1:a1:bee"),
362            Ok((
363                &b""[..],
364                (Bencode::Dict(
365                    [(
366                        b"spam".to_vec(),
367                        Bencode::List(
368                            [
369                                Bencode::String(b"a".to_vec()),
370                                Bencode::String(b"b".to_vec())
371                            ]
372                            .into()
373                        )
374                    )]
375                    .into()
376                ),)
377            ))
378        );
379
380        assert_eq!(
381            bencode(b"d9:publisher3:bob17:publisher-webpage15:www.example.com18:publisher.location4:homee"),
382            Ok((
383                &b""[..],
384                (Bencode::Dict(
385                    [
386                        (b"publisher".to_vec(), Bencode::String(b"bob".to_vec())),
387                        (b"publisher-webpage".to_vec(), Bencode::String(b"www.example.com".to_vec())),
388                        (b"publisher.location".to_vec(), Bencode::String(b"home".to_vec()))
389                    ]
390                    .into()
391                ),)
392            ))
393        );
394
395        assert_eq!(bencode(b"de"), Ok((&b""[..], (Bencode::Dict([].into()),))));
396    }
397
398    #[test]
399    fn test_bencode() {
400        let mut p = bencode;
401        assert_eq!(p.apply(b"i42e"), Ok((&b""[..], (Bencode::Integer(42i64),))));
402        assert_eq!(p.apply(b"i-42e"), Ok((&b""[..], (Bencode::Integer(-42i64),))));
403        assert_eq!(p.apply(b"i-0e"), Err(FatalError::Fatal(Error::InvalidNumber)));
404        assert_eq!(p.apply(b"i0e"), Ok((&b""[..], (Bencode::Integer(0i64),))));
405        assert_eq!(p.apply(b"i0004e"), Err(FatalError::Fatal(Error::InvalidNumber)));
406        let max = format!("i{}e", i64::MAX);
407        let min = format!("i{}e", i64::MIN);
408        assert_eq!(
409            bencode(max.as_bytes()),
410            Ok((&b""[..], (Bencode::Integer(i64::MAX),)))
411        );
412        assert_eq!(
413            bencode(min.as_bytes()),
414            Ok((&b""[..], (Bencode::Integer(i64::MIN),)))
415        );
416        assert!(p.apply(b"i0").is_err());
417    }
418
419    #[test]
420    fn test_nested() {
421        const NB: usize = 2000;
422        let buf = [b"d5:hellol"; NB]
423            .into_iter()
424            .flatten()
425            .chain([b"ee"; NB].into_iter().flatten())
426            .copied()
427            .collect::<Vec<u8>>();
428        bencode(&buf).expect("should be ok");
429    }
430
431    #[test]
432    fn test_deeply_nested() {
433        const NB: usize = 3000;
434        let mut buf = Vec::new();
435        buf.resize(NB, b'l');
436        buf.resize(NB * 2, b'e');
437        assert_eq!(bencode_limited(NB - 1, &buf), Err(FatalError::Fatal(Error::MaxDepth)))
438    }
439}