sveppa_bencode/
parser.rs

1use std::collections::HashMap;
2
3use crate::bencode::{
4    Bencode,
5    Bencode::*,
6};
7use crate::error::{
8    BencodeParserError,
9    BencodeParserError::*,
10};
11
12pub(crate) fn parse_child(bytes: &[u8]) -> Result<(Bencode, usize), BencodeParserError> {
13    match bytes[0] {
14        48..=57 => bytestr_from_bytes(&bytes),
15        100 => dict_from_bytes(&bytes),
16        105 => integer_from_bytes(&bytes),
17        108 => list_from_bytes(&bytes),
18        _ => Err(NotAValidBencodeByte),
19    }
20}
21
22/// parse a byte array as a Bencode::Integer.
23pub(crate) fn integer_from_bytes(bytes: &[u8]) -> Result<(Bencode, usize), BencodeParserError> {
24    if bytes[0] != 'i' as u8 {
25        return Err(BadFirstByte);
26    }
27
28    let si: usize = match ascii_integer(bytes, 'e' as u8, 1) {
29        Ok(si) => si,
30        Err(err) => return Err(err),
31    };
32
33    // smallest legal value possible is 2
34    if si == 0 {
35        // 0 indicates we hit no sentinel.
36        return Err(MissingSentinel);
37    } else if si == 1 {
38        // 1 indicates the byte array was: ['i', 'e'] (no value)
39        return Err(InvalidIntegerString);
40    }
41
42    // +1 for 'e', +2 for char after 'e'
43    Ok((Value(bytes[1..si].to_vec()), bytes[1..si].len() + 2))
44}
45
46/// parse a byte array as a Bencode::ByteStr
47pub(crate) fn bytestr_from_bytes(bytes: &[u8]) -> Result<(Bencode, usize), BencodeParserError> {
48    let si: usize = match ascii_integer(bytes, ':' as u8, 0) {
49        Ok(si) => si,
50        Err(err) => return Err(err),
51    };
52
53    let len_str: String = String::from_utf8(bytes[0..si].to_vec()).unwrap();
54    let len_val: usize = usize::from_str_radix(&len_str, 10).unwrap();
55
56    if len_val + si >= bytes.len() {
57        return Err(ByteStringLengthOverflow);
58    }
59
60    Ok((
61        Value(bytes[si + 1..=si + len_val].to_vec()),
62        len_val + len_str.len() + 1,
63    ))
64}
65
66/// parse a byte array as a Bencode::List
67pub(crate) fn list_from_bytes(bytes: &[u8]) -> Result<(Bencode, usize), BencodeParserError> {
68    let mut children: Vec<Bencode> = Vec::new();
69    let mut i: usize = 1;
70    while i < bytes.len() && bytes[i] != 'e' as u8 {
71        let (child, offset) = match parse_child(&bytes[i..]) {
72            Ok(val) => val,
73            Err(benc_err) => return Err(benc_err),
74        };
75        i += offset;
76        children.push(child);
77    }
78    if i >= bytes.len() {
79        return Err(MissingSentinel);
80    }
81    Ok((List(children), i + 1))
82}
83
84/// parse a byte array as a Bencode::Dict.
85pub(crate) fn dict_from_bytes(bytes: &[u8]) -> Result<(Bencode, usize), BencodeParserError> {
86    let mut children: HashMap<Vec<u8>, Bencode> = HashMap::new();
87    let mut i: usize = 1;
88
89    while i < bytes.len() && bytes[i] != 'e' as u8 {
90        // this check is needed here, because this is the only place where the ByteStr type
91        // is required, all other places switch on the first byte.
92        if bytes[i] < 48 || bytes[i] > 57 {
93            return Err(BadDictionaryKey);
94        }
95        let (key, offset): (Bencode, usize) = match bytestr_from_bytes(&bytes[i..]) {
96            Ok(val) => val,
97            Err(benc_err) => return Err(benc_err),
98        };
99        i += offset;
100        let (val, offset): (Bencode, usize) = match parse_child(&bytes[i..]) {
101            Ok(val) => val,
102            Err(benc_err) => return Err(benc_err),
103        };
104        i += offset;
105        children.insert(key.as_bytes().unwrap().to_owned(), val);
106    }
107    if i >= bytes.len() {
108        return Err(MissingSentinel);
109    }
110    Ok((Dict(children), i + 1))
111}
112
113
114/// iterates over a byte array looking for seek_byte. along the way it verifies that that every
115/// byte is a valid ASCII [0-9]. If a non ASCII byte is found a
116/// BencodeError::InvalidIntegerString is thrown.
117///
118/// bytes           the bytes to iterator over.
119/// seek_byte       the byte to return the index of.
120/// offset          where to start in the byte array.
121///
122/// returns Ok(index) where `index` is the index of the `seek_byte` in the `bytes` array,
123/// or Err(benc_err) where benc_err is the BencodeError that occurred while looking for the
124/// `seek_byte`.
125fn ascii_integer(bytes: &[u8], seek_byte: u8, offset: usize) -> Result<usize, BencodeParserError> {
126    for (i, b) in bytes[offset..].iter().enumerate() {
127        match b {
128            48..=57 => continue, // [0-9] keep parsing
129            x if x == &seek_byte => return Ok(i + offset),
130            _ => return Err(InvalidASCIIBytes),
131        };
132    }
133    return Err(MissingSentinel);
134}