nom_bencode/
lib.rs

1//! [![Version](https://img.shields.io/crates/v/nom_bencode)](https://crates.io/crates/nom_bencode)
2//! [![Downloads](https://img.shields.io/crates/d/nom_bencode)](https://crates.io/crates/nom_bencode)
3//! [![License](https://img.shields.io/crates/l/nom_bencode)](https://crates.io/crates/nom_bencode)
4//! ![Rust](https://github.com/edg-l/nom-bencode/workflows/Rust/badge.svg)
5//! [![Docs](https://docs.rs/nom_bencode/badge.svg)](https://docs.rs/nom_bencode)
6//!
7//! A bencode parser written with nom.
8//! ```rust
9//! use nom_bencode::Value;
10//!
11//! let data = nom_bencode::parse(b"d3:cow3:moo4:spam4:eggse").unwrap();
12//! let v = data.first().unwrap();
13//!
14//! if let Value::Dictionary(dict) = v {
15//!     let v = dict.get("cow".as_bytes()).unwrap();
16//!
17//!     if let Value::Bytes(data) = v {
18//!         assert_eq!(data, b"moo");
19//!     }
20//!
21//!     let v = dict.get("spam".as_bytes()).unwrap();
22//!     if let Value::Bytes(data) = v {
23//!         assert_eq!(data, b"eggs");
24//!     }
25//! }
26//! ```
27
28#![forbid(unsafe_code)]
29#![deny(missing_docs)]
30#![deny(warnings)]
31#![deny(clippy::nursery)]
32#![deny(clippy::pedantic)]
33#![deny(clippy::all)]
34
35pub use errors::BencodeError;
36use nom::{
37    branch::alt,
38    bytes::complete::take,
39    character::complete::{char, digit1},
40    combinator::{eof, recognize},
41    multi::{many0, many_till},
42    sequence::{delimited, pair, preceded},
43    IResult,
44};
45use std::{collections::HashMap, fmt::Debug};
46
47pub mod errors;
48pub use nom::Err;
49
50type BenResult<'a> = IResult<&'a [u8], Value<'a>, BencodeError<&'a [u8]>>;
51
52/// A bencode value.
53#[derive(Debug, Clone)]
54pub enum Value<'a> {
55    /// A byte array.
56    Bytes(&'a [u8]),
57    /// A integer.
58    Integer(i64),
59    /// A list of other bencode values.
60    List(Vec<Self>),
61    /// A dictionary of other bencode values.
62    Dictionary(HashMap<&'a [u8], Self>),
63}
64
65impl<'a> Value<'a> {
66    fn parse_integer(start_inp: &'a [u8]) -> BenResult {
67        let (inp, value) = delimited(
68            char('i'),
69            alt((
70                recognize(pair(char('+'), digit1)),
71                recognize(pair(char('-'), digit1)),
72                digit1,
73            )),
74            char('e'),
75        )(start_inp)?;
76
77        let value_str =
78            std::str::from_utf8(value).expect("value should be a valid integer str at this point");
79
80        if value_str.starts_with("-0") || (value_str.starts_with('0') && value_str.len() > 1) {
81            Err(nom::Err::Failure(BencodeError::InvalidInteger(start_inp)))
82        } else {
83            let value_integer: i64 = value_str
84                .parse()
85                .map_err(|e| BencodeError::ParseIntError(inp, e))?;
86            Ok((inp, Value::Integer(value_integer)))
87        }
88    }
89
90    fn parse_bytes(start_inp: &'a [u8]) -> BenResult<'a> {
91        let (inp, length) = digit1(start_inp)?;
92
93        let (inp, _) = char(':')(inp)?;
94
95        let length = std::str::from_utf8(length)
96            .expect("length should be a valid integer str at this point");
97
98        let length: u64 = length
99            .parse()
100            .map_err(|e| BencodeError::ParseIntError(inp, e))?;
101
102        if length == 0 {
103            Err(BencodeError::InvalidBytesLength(start_inp))?;
104        }
105
106        let (inp, characters) = take(length)(inp)?;
107
108        Ok((inp, Value::Bytes(characters)))
109    }
110
111    fn parse_list(start_inp: &'a [u8]) -> BenResult<'a> {
112        let (inp, value) = preceded(
113            char('l'),
114            many_till(
115                alt((
116                    Self::parse_bytes,
117                    Self::parse_integer,
118                    Self::parse_list,
119                    Self::parse_dict,
120                )),
121                char('e'),
122            ),
123        )(start_inp)?;
124
125        Ok((inp, Value::List(value.0)))
126    }
127
128    fn parse_dict(start_inp: &'a [u8]) -> BenResult<'a> {
129        let (inp, value) = preceded(
130            char('d'),
131            many_till(
132                pair(
133                    Self::parse_bytes,
134                    alt((
135                        Self::parse_bytes,
136                        Self::parse_integer,
137                        Self::parse_list,
138                        Self::parse_dict,
139                    )),
140                ),
141                char('e'),
142            ),
143        )(start_inp)?;
144
145        let data = value.0.into_iter().map(|x| {
146            // Keys are always string
147            if let Value::Bytes(key) = x.0 {
148                (key, x.1)
149            } else {
150                unreachable!()
151            }
152        });
153
154        let map = data.collect();
155
156        Ok((inp, Value::Dictionary(map)))
157    }
158}
159
160/// Parses the provided bencode `source`.
161///
162/// # Errors
163/// Returns `Err` if there was an error parsing `source`.
164pub fn parse(source: &[u8]) -> Result<Vec<Value>, Err<BencodeError<&[u8]>>> {
165    let (source2, items) = many0(alt((
166        Value::parse_bytes,
167        Value::parse_integer,
168        Value::parse_list,
169        Value::parse_dict,
170    )))(source)?;
171
172    let _ = eof(source2)?;
173
174    Ok(items)
175}
176
177#[cfg(test)]
178mod tests {
179    use crate::{parse, BencodeError, Value};
180    use assert_matches::assert_matches;
181    use proptest::{collection::vec, prelude::*};
182
183    #[test]
184    fn test_integer() {
185        let (_, v) = Value::parse_integer(b"i3e").unwrap();
186        assert_matches!(v, Value::Integer(3));
187
188        let (_, v) = Value::parse_integer(b"i3e1:a").unwrap();
189        assert_matches!(v, Value::Integer(3));
190
191        let (_, v) = Value::parse_integer(b"i-3e").unwrap();
192        assert_matches!(v, Value::Integer(-3));
193
194        let (_, v) = Value::parse_integer(b"i333333e").unwrap();
195        assert_matches!(v, Value::Integer(333_333));
196
197        let v = Value::parse_integer(b"i-0e").unwrap_err();
198        assert_matches!(v, nom::Err::Failure(BencodeError::InvalidInteger(_)));
199
200        let v = Value::parse_integer(b"i00e").unwrap_err();
201        assert_matches!(v, nom::Err::Failure(BencodeError::InvalidInteger(_)));
202
203        let v = Value::parse_integer(b"i-00e").unwrap_err();
204        assert_matches!(v, nom::Err::Failure(BencodeError::InvalidInteger(_)));
205
206        let v = Value::parse_integer(b"i03e").unwrap_err();
207        assert_matches!(v, nom::Err::Failure(BencodeError::InvalidInteger(_)));
208
209        let v = Value::parse_integer(b"i0040e").unwrap_err();
210        assert_matches!(v, nom::Err::Failure(BencodeError::InvalidInteger(_)));
211
212        let v = Value::parse_integer(b"li3ee").unwrap_err();
213        assert_matches!(v, nom::Err::Error(BencodeError::Nom(..)));
214    }
215
216    #[test]
217    fn test_string() {
218        let (_, v) = Value::parse_bytes(b"4:abcd").unwrap();
219        assert_matches!(v, Value::Bytes(b"abcd"));
220
221        let (_, v) = Value::parse_bytes(b"1:a").unwrap();
222        assert_matches!(v, Value::Bytes(b"a"));
223
224        let (_, v) = Value::parse_bytes(b"1:rock").unwrap();
225        assert_matches!(v, Value::Bytes(b"r"));
226
227        let v = Value::parse_bytes(b"0:a").unwrap_err();
228        assert_matches!(v, nom::Err::Failure(BencodeError::InvalidBytesLength(_)));
229    }
230
231    #[test]
232    fn test_list() {
233        let (_, v) = Value::parse_list(b"l4:spam4:eggsi22eli1ei2eee").unwrap();
234        assert_matches!(v, Value::List(_));
235
236        if let Value::List(list) = v {
237            let mut it = list.iter();
238
239            let x = it.next().unwrap();
240            assert_matches!(*x, Value::Bytes(b"spam"));
241
242            let x = it.next().unwrap();
243            assert_matches!(*x, Value::Bytes(b"eggs"));
244
245            let x = it.next().unwrap();
246            assert_matches!(*x, Value::Integer(22));
247
248            let x = it.next().unwrap();
249            assert_matches!(*x, Value::List(_));
250
251            if let Value::List(list) = x {
252                let mut it = list.iter();
253
254                let x = it.next().unwrap();
255                assert_matches!(*x, Value::Integer(1));
256
257                let x = it.next().unwrap();
258                assert_matches!(*x, Value::Integer(2));
259            }
260        }
261    }
262
263    #[test]
264    fn test_list_empty() {
265        let (_, v) = Value::parse_list(b"le").unwrap();
266        assert_matches!(v, Value::List(_));
267    }
268
269    #[test]
270    fn test_dict() {
271        let (_, v) = Value::parse_dict(b"d3:cow3:moo4:spam4:eggse").unwrap();
272        assert_matches!(v, Value::Dictionary(_));
273
274        if let Value::Dictionary(dict) = v {
275            let v = dict.get(b"cow".as_slice()).unwrap();
276            assert_matches!(*v, Value::Bytes(b"moo"));
277
278            let v = dict.get(b"spam".as_slice()).unwrap();
279            assert_matches!(*v, Value::Bytes(b"eggs"));
280        }
281
282        let (_, v) = Value::parse_dict(b"d4:spaml1:a1:bee").unwrap();
283        assert_matches!(v, Value::Dictionary(_));
284
285        if let Value::Dictionary(dict) = v {
286            let v = dict.get(b"spam".as_slice()).unwrap();
287            assert_matches!(*v, Value::List(_));
288        }
289    }
290
291    #[test]
292    fn test_parse() {
293        let data = parse(b"d3:cow3:moo4:spam4:eggse").unwrap();
294        let v = data.first().unwrap();
295        assert_matches!(v, Value::Dictionary(_));
296
297        if let Value::Dictionary(dict) = v {
298            let v = dict.get(b"cow".as_slice()).unwrap();
299            assert_matches!(*v, Value::Bytes(b"moo"));
300
301            let v = dict.get(b"spam".as_slice()).unwrap();
302            assert_matches!(*v, Value::Bytes(b"eggs"));
303        }
304
305        let (_, v) = Value::parse_dict(b"d4:spaml1:a1:bee").unwrap();
306        assert_matches!(v, Value::Dictionary(_));
307
308        if let Value::Dictionary(dict) = v {
309            let v = dict.get(b"spam".as_slice()).unwrap();
310            assert_matches!(*v, Value::List(_));
311        }
312    }
313
314    #[test]
315    fn test_parse_invalid_integer() {
316        let data = Value::parse_integer(b"123");
317        assert!(data.is_err());
318    }
319
320    #[test]
321    fn test_parse_invalid_bytes() {
322        let data = Value::parse_bytes(b"123");
323        assert!(data.is_err());
324    }
325
326    #[test]
327    fn test_parse_invalid_list() {
328        let data = Value::parse_list(b"123");
329        assert!(data.is_err());
330
331        let data = Value::parse_list(b"l123");
332        assert!(data.is_err());
333
334        let data = Value::parse_list(b"li1e");
335        assert!(data.is_err());
336    }
337
338    #[test]
339    fn test_parse_invalid_dict() {
340        let data = Value::parse_dict(b"123");
341        assert!(data.is_err());
342
343        let data = Value::parse_dict(b"d123");
344        assert!(data.is_err());
345    }
346
347    #[test]
348    fn test_parse_invalid_x() {
349        let data = parse(b"123");
350        assert!(data.is_err());
351    }
352
353    #[test]
354    fn test_parse_torrent() {
355        let data = parse(include_bytes!("../test-assets/big-buck-bunny.torrent")).unwrap();
356        assert_eq!(data.len(), 1);
357
358        let v = data.first().unwrap();
359        assert_matches!(*v, Value::Dictionary(_));
360
361        if let Value::Dictionary(dict) = v {
362            let info = dict.get(b"info".as_slice()).unwrap();
363            assert_matches!(*info, Value::Dictionary(_));
364
365            let announce = dict.get(b"announce".as_slice()).unwrap();
366            assert_matches!(*announce, Value::Bytes(_));
367
368            if let Value::Bytes(announce) = *announce {
369                let announce = std::str::from_utf8(announce).unwrap();
370                assert_eq!(announce, "udp://tracker.leechers-paradise.org:6969");
371            }
372
373            let announce_list = dict.get(b"announce-list".as_slice()).unwrap();
374            assert_matches!(*announce_list, Value::List(_));
375        }
376
377        let _ = parse(include_bytes!("../test-assets/private.torrent")).unwrap();
378        let _ = parse(include_bytes!("../test-assets/multi-file.torrent")).unwrap();
379    }
380
381    prop_compose! {
382        fn bencode_bytes()(s in vec(any::<u8>(), 1..100)) -> Vec<u8> {
383            let mut data: Vec<u8> = Vec::with_capacity(s.len() + 5);
384            data.extend(format!("{}:", s.len()).as_bytes());
385            data.extend(s);
386            data
387        }
388    }
389
390    prop_compose! {
391        fn bencode_integer()(s in any::<i64>()) -> Vec<u8> {
392            format!("i{s}e").as_bytes().to_vec()
393        }
394    }
395
396    prop_compose! {
397        fn bencode_list()(s in vec((bencode_integer(), bencode_bytes()), 1..100)) -> Vec<u8> {
398            let mut data: Vec<u8> = Vec::with_capacity(s.len() + 2);
399            data.extend(b"l");
400            for (i, (a, b)) in s.iter().enumerate() {
401                if i % 2 == 0 {
402                    data.extend(a);
403                    data.extend(b);
404                } else {
405                    data.extend(b);
406                    data.extend(a);
407                }
408
409            }
410            data.extend(b"e");
411            data
412        }
413    }
414
415    prop_compose! {
416        fn bencode_dict()(s in vec((bencode_bytes(), bencode_bytes()), 1..100)) -> Vec<u8> {
417            let mut data: Vec<u8> = Vec::with_capacity(s.len() + 2);
418            data.extend(b"d");
419            for (i, (a, b)) in s.iter().enumerate() {
420                if i % 2 == 0 {
421                    data.extend(a);
422                    data.extend(b);
423                } else {
424                    data.extend(b);
425                    data.extend(a);
426                }
427            }
428            data.extend(b"e");
429            data
430        }
431    }
432
433    proptest! {
434        #[test]
435        fn proptest_doesnt_panic_or_overflow(s in any::<Vec<u8>>()) {
436            parse(&s).ok();
437        }
438
439        #[test]
440        fn proptest_parse_integer(s in bencode_integer()) {
441            prop_assert!(Value::parse_integer(&s).is_ok());
442        }
443
444        #[test]
445        fn proptest_parse_bytes(s in bencode_bytes()) {
446            let mut data: Vec<u8> = Vec::with_capacity(s.len() + 5);
447            data.extend(format!("{}:", s.len()).as_bytes());
448            data.extend(s);
449            prop_assert!(Value::parse_bytes(&data).is_ok());
450        }
451
452        #[test]
453        fn proptest_parse_list(s in bencode_list()) {
454            prop_assert!(Value::parse_list(&s).is_ok());
455        }
456
457        #[test]
458        fn proptest_parse_dict(s in bencode_dict()) {
459            prop_assert!(Value::parse_dict(&s).is_ok());
460        }
461    }
462}