juicy_bencode/
lib.rs

1#![allow(unused)]
2//! ![](https://img.shields.io/docsrs/juicy_bencode)
3//! ![](https://img.shields.io/crates/v/juicy_bencode)
4//! ![](https://img.shields.io/crates/l/juicy_bencode)
5//!
6//! A little parser for [bencode](https://www.bittorrent.org/beps/bep_0003.html#bencoding) using the
7//! Nom library. **Nom eats input byte by byte, and bencode is such juicy input!**
8//!
9//! Bencode allows for 4 kinds of values:
10//! 1. integers
11//! 2. byte strings
12//! 3. lists
13//! 4. dictionaries
14//!
15//! Unlike JSON, bencode doesn't say how to encode stuff in general, but in practice, you should
16//! just//! parse a bencode blob as a dictionary (just like JSON). Although, the individual parsing
17//! functions are provided.
18//!
19//! For more information about bencode, you're encourage to read the specification. It's less than
20//! 200 words long!
21
22use nom::{
23    branch::alt,
24    bytes::complete::{tag, take, take_while, take_while1},
25    character::complete::{char, digit0, i64, u64},
26    multi::many1,
27    combinator::{recognize, complete, map},
28    sequence::{terminated, tuple, delimited, pair, preceded},
29    Err, IResult, ParseTo,
30};
31use std::collections::BTreeMap;
32
33extern crate derive_more;
34
35fn is_non_zero_num(c: u8) -> bool {
36    [b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'].contains(&c)
37}
38
39
40/// Parse out a bencode integer, conforming to bencode specification, leading 0s and negative 0 are
41/// rejected. Since the bencode specification places no limit on the range of the integers, the
42/// function will only give out string slices and leave the conversion choice to the user.`
43///
44/// # Note
45/// Although the functions are exposed directly, it's unsuitable to be used directly in most cases,
46/// it's provided for quick and dirty convenience only.
47pub fn parse_bencode_num(input: &[u8]) -> IResult<&[u8], &[u8]> {
48    let zero = complete(tag("0"));
49    let minus_sign = tag("-");
50
51    // positive case, any digits without leading zeros
52    // the function can't be clone for some reason, welp
53    let positive1 = recognize(pair(take_while1(is_non_zero_num), digit0));
54    let positive2 = recognize(pair(take_while1(is_non_zero_num), digit0));
55
56    // negative case
57    let negative = recognize(pair(minus_sign, positive1));
58
59    delimited(tag("i"), alt((positive2, negative, zero)), tag("e"))(input)
60}
61
62/// Parse out a bencode string, note that bencode strings are not equivalent to Rust strings since
63/// bencode places no limit what encoding it uses, hence it's more appreciate to call them byte
64/// strings
65///
66/// # Note
67/// Although the functions are exposed directly, it's unsuitable to be used directly in most cases,
68/// it's provided for quick and dirty convenience only.
69pub fn parse_bencode_string(input: &[u8]) -> IResult<&[u8], &[u8]> {
70    let (str, length) = u64(input)?;
71
72    preceded(tag(":"), take(length))(str)
73}
74
75/// Parse out bencode list, technically, bencode places not restriction on if the list items are
76/// homogeneous, meaning a list could contain both integers and strings.
77///
78/// # Note
79/// Although the functions are exposed directly, it's unsuitable to be used directly in most cases,
80/// it's provided for quick and dirty convenience only.
81pub fn parse_bencode_list(input: &[u8]) -> IResult<&[u8], Vec<BencodeItemView>> {
82    let list_elems = many1(bencode_value);
83
84    delimited(tag("l"), list_elems, tag("e"))(input)
85}
86
87
88/// Main entry for the parser (for all practical purposes, a blob of bencode is consist of key value
89/// pairs). It parses out a bencode dictionary, bencode places no restriction on the homogeneity of
90/// dictionary pairs.
91pub fn parse_bencode_dict(input: &[u8]) -> IResult<&[u8], BTreeMap<&[u8], BencodeItemView>> {
92    let key_value = many1(pair(parse_bencode_string, bencode_value));
93
94    let (remaining, key_value_pairs) = delimited(tag("d"), key_value, tag("e"))(input)?;
95
96    let dict = key_value_pairs
97        .into_iter()
98        .fold(BTreeMap::new(), |mut acc, x| {
99            acc.insert(x.0, x.1);
100            acc
101        });
102
103    Ok((remaining, dict))
104
105    // TODO: bencode requires the keys of the dictionary to be in lexicographical order, maybe this
106    // isn't the best place to handle this
107    //
108    // // somehow the Vec::is_sorted requires nightly as of 1.61, this is so ghetto
109    //
110    // let mut sorted = key_value_pairs.clone();
111    // sorted.sort_unstable_by_key(|elem| elem.0);
112    // let sorted_keys = sorted.iter().map(|x| x.0);
113    //
114    //
115    // if !key_value_pairs
116    //     .iter()
117    //     .map(|x| x.0)
118    //     .zip(sorted_keys)
119    //     .all(|pair| pair.0 == pair.1) {
120    //     return Err::Failure(BencodeSchemaError::new("Nothing is actually broken about your dict, but the bencode specification states all keys must appear in lexicographical order".into_string(), BencodeSchemaErrorKinds::DictNotInLexicographicalOrder));
121    // }
122}
123
124/// Top level combinator for choosing an appreciate strategy for parsing out a bencode item
125fn bencode_value(input: &[u8]) -> IResult<&[u8], BencodeItemView> {
126    let to_int = map(parse_bencode_num, |int_pattern| {
127        BencodeItemView::Integer(int_pattern.parse_to().unwrap())
128    });
129    let to_byte_str = map(parse_bencode_string, |byte_slice| {
130        BencodeItemView::ByteString(byte_slice)
131    });
132    let to_list = map(parse_bencode_list, BencodeItemView::List);
133    let to_dict = map(parse_bencode_dict,  BencodeItemView::Dictionary);
134
135    alt((to_int, to_byte_str, to_list, to_dict))(input)
136}
137
138/// Representation of bencode blobs as a tree. The lifetime is tied to the text in memory, achieving
139/// *almost zero copy*. This is perhaps unsuitable for large bencode blobs since the entire blob may
140/// not fit inside the memory.
141///
142/// An owned version is on the agenda but I can't be bothered right now.
143#[derive(Debug, Ord, Clone, PartialOrd, Eq, PartialEq, Hash)]
144pub enum BencodeItemView<'a> {
145    // TODO: technically the specification doesn't say any limits on the integer size, need to switch
146    // to an infinite size one
147    /// Bencode integers are represented as i64 for now, technically this is not to specification
148    /// since no range limit is specified in the bencode document.
149    Integer(i64),
150
151    /// Bencode strings are not guaranteed to be UTF-8, thus using a byte slice
152    ByteString(&'a [u8]),
153
154    /// Bencode lists, note lists may not be homogeneous
155    List(Vec<BencodeItemView<'a>>),
156
157    /// Bencode dictionary, note lists may not be homogeneous. Bencode dictionary by specification
158    /// must be lexicographically sorted, BTree preserves ordering
159    Dictionary(BTreeMap<&'a [u8], BencodeItemView<'a>>),
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165
166    #[test]
167    fn zero_is_valid_be_num() {
168        let (_, parsed) = parse_bencode_num(b"i0e").unwrap();
169        assert_eq!(parsed, b"0");
170    }
171
172    #[test]
173    fn positive_number_without_leading_zero_is_valid() {
174        let (_, parsed) = parse_bencode_num(b"i124223e").unwrap();
175        assert_eq!(parsed, b"124223");
176    }
177
178    #[test]
179    fn positive_number_with_leading_zero_is_rejected() {
180        let res = parse_bencode_num(b"i0001e");
181        assert!(res.is_err());
182    }
183
184    #[test]
185    fn negative_number_without_leading_zero_is_valid() {
186        let (_, parsed) = parse_bencode_num(b"i-121241e").unwrap();
187        assert_eq!(parsed, b"-121241")
188    }
189
190    #[test]
191    fn negative_zero_is_not_allowed() {
192        let parsed = parse_bencode_num(b"i-0e");
193        assert!(parsed.is_err())
194    }
195
196    #[test]
197    fn positive_number_with_zeroes_in_between_is_valid() {
198        let (_, parsed) = parse_bencode_num(b"i700454e").unwrap();
199        assert_eq!(parsed, b"700454");
200    }
201
202    #[test]
203    fn negative_number_with_zeroes_in_between_is_valid() {
204        let (_, parsed) = parse_bencode_num(b"i-6004e").unwrap();
205        assert_eq!(parsed, b"-6004")
206    }
207
208    #[test]
209    fn naked_numbers_are_not_bencode_numbers() {
210        let parsed = parse_bencode_num(b"8232");
211        assert!(parsed.is_err())
212    }
213
214    #[test]
215    fn negative_number_with_leading_zeroes_is_not_allowed() {
216        let parsed = parse_bencode_num(b"i-0001213e");
217        assert!(parsed.is_err())
218    }
219
220    #[test]
221    fn letters_are_not_be_numbers() {
222        let parsed = parse_bencode_num(b"iabcedfge");
223        assert!(parsed.is_err());
224    }
225
226    #[test]
227    fn naked_string_is_not_bencode_string() {
228        let parsed = parse_bencode_string(b"string!");
229        assert!(parsed.is_err());
230    }
231
232    #[test]
233    fn bencode_string_takes_exact_length() {
234        let (_, parsed) = parse_bencode_string(b"4:spam").unwrap();
235        assert_eq!(parsed, b"spam");
236    }
237
238    #[test]
239    fn strings_shorter_than_declaration_is_not_allowed() {
240        let parsed = parse_bencode_string(b"4:spa");
241        assert!(parsed.is_err());
242    }
243
244    #[test]
245    fn bencode_list_eats_all_inputs() {
246        let (remaining, parsed) = parse_bencode_list(b"l4:spami42ee").unwrap();
247
248        let expected = vec![
249            BencodeItemView::ByteString(b"spam"),
250            BencodeItemView::Integer(42),
251        ];
252        assert_eq!(expected, parsed);
253        assert_eq!(remaining, b"");
254    }
255
256    #[test]
257    fn bencode_dict_eats_all_inputs() {
258        let (remaining, parsed) = parse_bencode_dict(b"d3:bar4:spam3:fooi42ee").unwrap();
259
260        let mut expected = BTreeMap::new();
261        expected.insert(b"bar".as_slice(), BencodeItemView::ByteString(b"spam"));
262        expected.insert(b"foo".as_slice(), BencodeItemView::Integer(42));
263
264        assert_eq!(expected, parsed);
265        assert_eq!(remaining, b"");
266    }
267}