bside/
parser.rs

1use crate::error::Error;
2use crate::value::Value;
3use nom::IResult;
4use nom::Parser;
5use nom::branch::alt;
6use nom::character::{self, one_of};
7use nom::combinator::recognize;
8use nom::multi::{length_data, many0};
9use nom::sequence::{delimited, pair, terminated};
10use std::collections::HashMap;
11
12/// Parses a bencoded value into a [`Value`]. Dictionaries are required to
13/// be sorted, in keeping with the specification.
14pub fn parse(input: &[u8]) -> Result<Value, Error> {
15    parse_with_strictness(input, true)
16}
17
18/// Parses a bencoded value into a [`Value`]. Dictionaries are not required to
19/// be sorted.
20pub fn parse_lenient(input: &[u8]) -> Result<Value, Error> {
21    parse_with_strictness(input, false)
22}
23
24// Parses a bencoded value into a `Value`. If `strict` is `true`, dictionaries are required to be
25// sorted.
26fn parse_with_strictness(input: &[u8], strict: bool) -> Result<Value, Error> {
27    if input.is_empty() {
28        Err(Error::Message(String::from("Input is empty.")))
29    } else {
30        match parse_value(input, strict) {
31            Ok((remaining, value)) => {
32                if remaining.is_empty() {
33                    Ok(value)
34                } else {
35                    Err(Error::Message(String::from("Non-singular root item.")))
36                }
37            }
38
39            _ => Err(Error::Message(String::from("Invalid input."))),
40        }
41    }
42}
43
44fn parse_value(input: &[u8], strict: bool) -> IResult<&[u8], Value> {
45    alt((
46        parse_int,
47        parse_bytes,
48        |input| parse_list(input, strict),
49        |input| parse_dict(input, strict),
50    ))
51    .parse(input)
52}
53
54fn parse_int(input: &[u8]) -> IResult<&[u8], Value> {
55    fn recognize_positive_int(input: &[u8]) -> IResult<&[u8], &[u8]> {
56        recognize((one_of("123456789"), many0(one_of("0123456789")))).parse(input)
57    }
58
59    fn recognize_negative_int(input: &[u8]) -> IResult<&[u8], &[u8]> {
60        recognize((
61            character::complete::char('-'),
62            one_of("123456789"),
63            many0(one_of("0123456789")),
64        ))
65        .parse(input)
66    }
67
68    fn recognize_zero(input: &[u8]) -> IResult<&[u8], &[u8]> {
69        recognize(character::complete::char('0')).parse(input)
70    }
71
72    fn recognize_int_chars(input: &[u8]) -> IResult<&[u8], &[u8]> {
73        alt((
74            recognize_positive_int,
75            recognize_negative_int,
76            recognize_zero,
77        ))
78        .parse(input)
79    }
80
81    let (remaining, int_chars) = delimited(
82        character::complete::char('i'),
83        recognize_int_chars,
84        character::complete::char('e'),
85    )
86    .parse(input)?;
87
88    let (_, int) = character::complete::i64.parse(int_chars)?;
89    Ok((remaining, Value::Int(int)))
90}
91
92fn parse_byte_slice(input: &[u8]) -> IResult<&[u8], &[u8]> {
93    fn parse_length(input: &[u8]) -> IResult<&[u8], u64> {
94        terminated(character::complete::u64, character::complete::char(':')).parse(input)
95    }
96
97    length_data(parse_length).parse(input)
98}
99
100fn parse_bytes(input: &[u8]) -> IResult<&[u8], Value> {
101    let (remaining, parsed) = parse_byte_slice(input)?;
102    Ok((remaining, Value::Bytes(parsed.into())))
103}
104
105fn parse_list(input: &[u8], strict: bool) -> IResult<&[u8], Value> {
106    let (remaining, parsed) = delimited(
107        character::complete::char('l'),
108        many0(|input| parse_value(input, strict)),
109        character::complete::char('e'),
110    )
111    .parse(input)?;
112
113    Ok((remaining, Value::List(parsed)))
114}
115
116fn parse_dict(input: &[u8], strict: bool) -> IResult<&[u8], Value> {
117    let (remaining, pairs) = delimited(
118        character::complete::char('d'),
119        many0(pair(parse_byte_slice, |input| parse_value(input, strict))),
120        character::complete::char('e'),
121    )
122    .parse(input)?;
123
124    if !strict || pairs.is_sorted_by_key(|(key, _)| key) {
125        Ok((
126            remaining,
127            Value::Dict(HashMap::from_iter(
128                pairs.into_iter().map(|(key, value)| (key.into(), value)),
129            )),
130        ))
131    } else {
132        Err(nom::Err::Failure(nom::error::Error {
133            input,
134            code: nom::error::ErrorKind::Fail,
135        }))
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn positive_integer() {
145        assert_eq!(parse(b"i1203e").unwrap(), Value::Int(1203));
146    }
147
148    #[test]
149    fn negative_integer() {
150        assert_eq!(parse(b"i-1203e").unwrap(), Value::Int(-1203));
151    }
152
153    #[test]
154    fn zero() {
155        assert_eq!(parse(b"i0e").unwrap(), Value::Int(0));
156    }
157
158    #[test]
159    fn byte_string() {
160        assert_eq!(
161            parse(b"7:bencode").unwrap(),
162            Value::Bytes(b"bencode".into())
163        );
164    }
165
166    #[test]
167    fn list() {
168        assert_eq!(
169            parse(b"l7:bencodei-20ee").unwrap(),
170            Value::List(vec![Value::Bytes(b"bencode".into()), Value::Int(-20)])
171        );
172    }
173
174    #[test]
175    fn dict() {
176        assert_eq!(
177            parse(b"d7:meaningi42e4:wiki7:bencodee").unwrap(),
178            Value::Dict(HashMap::from([
179                (b"wiki".into(), Value::Bytes(b"bencode".into())),
180                (b"meaning".into(), Value::Int(42))
181            ]))
182        );
183    }
184
185    #[test]
186    fn dict_lenient() {
187        assert_eq!(
188            parse_lenient(b"d4:wiki7:bencode7:meaningi42ee").unwrap(),
189            Value::Dict(HashMap::from([
190                (b"wiki".into(), Value::Bytes(b"bencode".into())),
191                (b"meaning".into(), Value::Int(42))
192            ]))
193        );
194    }
195
196    fn expect_parse_error(result: Result<Value, Error>) {
197        assert!(matches!(result, Err(Error::Message(_))))
198    }
199
200    #[test]
201    fn empty_input() {
202        expect_parse_error(parse(b""));
203    }
204
205    #[test]
206    fn non_singular_root_item() {
207        expect_parse_error(parse(b"i123ei456e"));
208    }
209
210    #[test]
211    fn invalid_type() {
212        expect_parse_error(parse(b"a1e"));
213    }
214
215    #[test]
216    fn missing_colon() {
217        expect_parse_error(parse(b"1a"));
218    }
219
220    #[test]
221    fn missing_int_terminator() {
222        expect_parse_error(parse(b"i123"));
223    }
224
225    #[test]
226    fn eof_before_completing_byte_string() {
227        expect_parse_error(parse(b"7:abc"));
228    }
229
230    #[test]
231    fn missing_list_terminator() {
232        expect_parse_error(parse(b"l7:bencodei-20e"));
233    }
234
235    #[test]
236    fn missing_dict_terminator() {
237        expect_parse_error(parse(b"d7:meaningi42e4:wiki7:bencode"));
238    }
239
240    #[test]
241    fn integer_contains_non_digit() {
242        expect_parse_error(parse(b"i1_2e"));
243    }
244
245    #[test]
246    fn negative_zero() {
247        expect_parse_error(parse(b"i-0e"));
248    }
249
250    #[test]
251    fn positive_int_with_leading_zero() {
252        expect_parse_error(parse(b"i01e"));
253    }
254
255    #[test]
256    fn negative_int_with_leading_zero() {
257        expect_parse_error(parse(b"i-01e"));
258    }
259
260    #[test]
261    fn dict_key_not_byte_string() {
262        expect_parse_error(parse(b"di3ei4ee"))
263    }
264
265    #[test]
266    fn dict_missing_value() {
267        expect_parse_error(parse(b"d7:meaninge"))
268    }
269
270    #[test]
271    fn dict_not_sorted_strict() {
272        expect_parse_error(parse(b"d4:wiki7:bencode7:meaningi42ee"));
273    }
274}