bside/
parser.rs

1use nom::IResult;
2use nom::Parser;
3use nom::branch::alt;
4use nom::character::{self, one_of};
5use nom::combinator::recognize;
6use nom::multi::{length_data, many0};
7use nom::sequence::{delimited, pair, terminated};
8use std::collections::HashMap;
9
10use crate::error::Error;
11use crate::value::Value;
12
13/// Parses a bencoded value into a [`Value`]. If `strict` is `true`, dictionaries are required to
14/// be sorted (in keeping with the specification).
15pub fn parse(input: &[u8], strict: bool) -> Result<Value, Error> {
16    if input.is_empty() {
17        Err(Error::Message(String::from("Input is empty.")))
18    } else {
19        match parse_value(input, strict) {
20            Ok((remaining, value)) => {
21                if remaining.is_empty() {
22                    Ok(value)
23                } else {
24                    Err(Error::Message(String::from("Non-singular root item.")))
25                }
26            }
27
28            _ => Err(Error::Message(String::from("Invalid input."))),
29        }
30    }
31}
32
33fn parse_int(input: &[u8]) -> IResult<&[u8], Value> {
34    fn recognize_positive_int(input: &[u8]) -> IResult<&[u8], &[u8]> {
35        recognize((one_of("123456789"), many0(one_of("0123456789")))).parse(input)
36    }
37
38    fn recognize_negative_int(input: &[u8]) -> IResult<&[u8], &[u8]> {
39        recognize((
40            character::complete::char('-'),
41            one_of("123456789"),
42            many0(one_of("0123456789")),
43        ))
44        .parse(input)
45    }
46
47    fn recognize_zero(input: &[u8]) -> IResult<&[u8], &[u8]> {
48        recognize(character::complete::char('0')).parse(input)
49    }
50
51    fn recognize_int_chars(input: &[u8]) -> IResult<&[u8], &[u8]> {
52        alt((
53            recognize_positive_int,
54            recognize_negative_int,
55            recognize_zero,
56        ))
57        .parse(input)
58    }
59
60    let (remaining, int_chars) = delimited(
61        character::complete::char('i'),
62        recognize_int_chars,
63        character::complete::char('e'),
64    )
65    .parse(input)?;
66
67    let (_, int) = character::complete::i64.parse(int_chars)?;
68    Ok((remaining, Value::Int(int)))
69}
70
71fn parse_byte_slice(input: &[u8]) -> IResult<&[u8], &[u8]> {
72    fn parse_length(input: &[u8]) -> IResult<&[u8], u64> {
73        terminated(character::complete::u64, character::complete::char(':')).parse(input)
74    }
75
76    length_data(parse_length).parse(input)
77}
78
79fn parse_bytes(input: &[u8]) -> IResult<&[u8], Value> {
80    let (remaining, parsed) = parse_byte_slice(input)?;
81    Ok((remaining, Value::Bytes(parsed.into())))
82}
83
84fn parse_list(input: &[u8], strict: bool) -> IResult<&[u8], Value> {
85    let (remaining, parsed) = delimited(
86        character::complete::char('l'),
87        many0(|input| parse_value(input, strict)),
88        character::complete::char('e'),
89    )
90    .parse(input)?;
91
92    Ok((remaining, Value::List(parsed)))
93}
94
95fn parse_dict(input: &[u8], strict: bool) -> IResult<&[u8], Value> {
96    let (remaining, pairs) = delimited(
97        character::complete::char('d'),
98        many0(pair(parse_byte_slice, |input| parse_value(input, strict))),
99        character::complete::char('e'),
100    )
101    .parse(input)?;
102
103    if !strict || pairs.is_sorted_by_key(|(key, _)| key) {
104        Ok((
105            remaining,
106            Value::Dict(HashMap::from_iter(
107                pairs.into_iter().map(|(key, value)| (key.into(), value)),
108            )),
109        ))
110    } else {
111        Err(nom::Err::Failure(nom::error::Error {
112            input,
113            code: nom::error::ErrorKind::Fail,
114        }))
115    }
116}
117
118fn parse_value(input: &[u8], strict: bool) -> IResult<&[u8], Value> {
119    alt((
120        parse_int,
121        parse_bytes,
122        |input| parse_list(input, strict),
123        |input| parse_dict(input, strict),
124    ))
125    .parse(input)
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn positive_integer() {
134        assert_eq!(parse(b"i1203e", true).unwrap(), Value::Int(1203));
135    }
136
137    #[test]
138    fn negative_integer() {
139        assert_eq!(parse(b"i-1203e", true).unwrap(), Value::Int(-1203));
140    }
141
142    #[test]
143    fn zero() {
144        assert_eq!(parse(b"i0e", true).unwrap(), Value::Int(0));
145    }
146
147    #[test]
148    fn byte_string() {
149        assert_eq!(
150            parse(b"7:bencode", true).unwrap(),
151            Value::Bytes(b"bencode".into())
152        );
153    }
154
155    #[test]
156    fn list() {
157        assert_eq!(
158            parse(b"l7:bencodei-20ee", true).unwrap(),
159            Value::List(vec![Value::Bytes(b"bencode".into()), Value::Int(-20)])
160        );
161    }
162
163    #[test]
164    fn dict() {
165        assert_eq!(
166            parse(b"d7:meaningi42e4:wiki7:bencodee", true).unwrap(),
167
168            Value::Dict(HashMap::from([
169                (b"wiki".into(), Value::Bytes(b"bencode".into())),
170                (b"meaning".into(), Value::Int(42))
171            ]))
172        );
173    }
174
175    #[test]
176    fn dict_not_sorted_not_strict() {
177        assert_eq!(
178            parse(b"d4:wiki7:bencode7:meaningi42ee", false).unwrap(),
179
180            Value::Dict(HashMap::from([
181                (b"wiki".into(), Value::Bytes(b"bencode".into())),
182                (b"meaning".into(), Value::Int(42))
183            ]))
184        );
185    }
186
187    fn expect_parse_error(result: Result<Value, Error>) {
188        assert!(matches!(result, Err(Error::Message(_))))
189    }
190
191    #[test]
192    fn empty_input() {
193        expect_parse_error(parse(b"", true));
194    }
195
196    #[test]
197    fn non_singular_root_item() {
198        expect_parse_error(parse(b"i123ei456e", true));
199    }
200
201    #[test]
202    fn invalid_type() {
203        expect_parse_error(parse(b"a1e", true));
204    }
205
206    #[test]
207    fn missing_colon() {
208        expect_parse_error(parse(b"1a", true));
209    }
210
211    #[test]
212    fn missing_int_terminator() {
213        expect_parse_error(parse(b"i123", true));
214    }
215
216    #[test]
217    fn eof_before_completing_byte_string() {
218        expect_parse_error(parse(b"7:abc", true));
219    }
220
221    #[test]
222    fn missing_list_terminator() {
223        expect_parse_error(parse(b"l7:bencodei-20e", true));
224    }
225
226    #[test]
227    fn missing_dict_terminator() {
228        expect_parse_error(parse(b"d7:meaningi42e4:wiki7:bencode", true));
229    }
230
231    #[test]
232    fn integer_contains_non_digit() {
233        expect_parse_error(parse(b"i1_2e", true));
234    }
235
236    #[test]
237    fn negative_zero() {
238        expect_parse_error(parse(b"i-0e", true));
239    }
240
241    #[test]
242    fn positive_int_with_leading_zero() {
243        expect_parse_error(parse(b"i01e", true));
244    }
245
246    #[test]
247    fn negative_int_with_leading_zero() {
248        expect_parse_error(parse(b"i-01e", true));
249    }
250
251    #[test]
252    fn dict_key_not_byte_string() {
253        expect_parse_error(parse(b"di3ei4ee", true))
254    }
255
256    #[test]
257    fn dict_missing_value() {
258        expect_parse_error(parse(b"d7:meaninge", true))
259    }
260
261    #[test]
262    fn dict_not_sorted_strict() {
263        expect_parse_error(parse(b"d4:wiki7:bencode7:meaningi42ee", true));
264    }
265}