1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
use crate::error::{Error, Error::*, *};
use crate::types::{Bencode, Bencode::*};
use std::collections::HashMap;

pub(super) fn parse_child(bytes: &[u8]) -> Result {
    match bytes[0] {
        48..=57 => as_bstr(&bytes),
        100 => as_dict(&bytes),
        105 => as_int(&bytes),
        108 => as_list(&bytes),
        _ => Err(NotAValidBencodeByte),
    }
}

/// parse a byte array as a Bencode::Integer.
fn as_int(bytes: &[u8]) -> Result {
    if bytes[0] != 'i' as u8 {
        return Err(BadFirstByte);
    }

    let si: usize = match ascii_integer(bytes, 'e' as u8, 1) {
        Ok(si) => si,
        Err(err) => return Err(err),
    };

    // smallest legal value possible is 2
    if si == 0 {
        // 0 indicates we hit no sentinel.
        return Err(MissingSentinel);
    } else if si == 1 {
        // 1 indicates the byte array was: ['i', 'e'] (no value)
        return Err(InvalidIntegerString);
    }

    // With the 48..=57 range in ascii_integer() it should be impossible to get an invalid UTF8 byte or to get an
    // error thrown in from_str_radix.
    let int_str: String = String::from_utf8(bytes[1..si].to_vec()).unwrap();
    let int_val = usize::from_str_radix(&int_str, 10).unwrap();

    Ok((Integer(int_val), int_str.len() + 2)) // +1 for 'e', +2 for char after 'e'
}

/// parse a byte array as a Bencode::ByteStr
fn as_bstr(bytes: &[u8]) -> Result {
    let si: usize = match ascii_integer(bytes, ':' as u8, 0) {
        Ok(si) => si,
        Err(err) => return Err(err),
    };

    let len_str: String = String::from_utf8(bytes[0..si].to_vec()).unwrap();
    let len_val: usize = usize::from_str_radix(&len_str, 10).unwrap();

    if len_val + si >= bytes.len() {
        return Err(ByteStringLengthOverflow);
    }

    Ok((
        ByteStr(bytes[si + 1..=si + len_val].to_vec()),
        len_val + len_str.len() + 1,
    ))
}

/// parse a byte array as a Bencode::List
fn as_list(bytes: &[u8]) -> Result {
    let mut children: Vec<Bencode> = Vec::new();
    let mut i: usize = 1;
    while i < bytes.len() && bytes[i] != 'e' as u8 {
        let (child, offset) = match parse_child(&bytes[i..]) {
            Ok(val) => val,
            Err(benc_err) => return Err(benc_err),
        };
        i += offset;
        children.push(child);
    }
    if i >= bytes.len() {
        return Err(MissingSentinel);
    }
    Ok((List(children), i + 1))
}

/// parse a byte array as a Bencode::Dict.
fn as_dict(bytes: &[u8]) -> Result {
    let mut children: HashMap<Vec<u8>, Bencode> = HashMap::new();
    let mut i: usize = 1;

    while i < bytes.len() && bytes[i] != 'e' as u8 {
        // this check is needed here, because this is the only place where the ByteStr type
        // is required, all other places switch on the first byte.
        if bytes[i] < 48 || bytes[i] > 57 {
            return Err(BadDictionaryKey);
        }
        let (key, offset): (Bencode, usize) = match as_bstr(&bytes[i..]) {
            Ok(val) => val,
            Err(benc_err) => return Err(benc_err),
        };
        i += offset;
        let (val, offset): (Bencode, usize) = match parse_child(&bytes[i..]) {
            Ok(val) => val,
            Err(benc_err) => return Err(benc_err),
        };
        i += offset;
        children.insert(key.as_bstr().unwrap().to_owned(), val);
    }
    if i >= bytes.len() {
        return Err(MissingSentinel);
    }
    Ok((Dict(children), i + 1))
}

/// iterates over a byte array looking for seek_byte. along the way it verifies that that every
/// byte is a valid ASCII [0-9]. If a non ASCII byte is found a
/// BencodeError::InvalidIntegerString is thrown.
///
/// bytes           the bytes to iterator over.
/// seek_byte       the byte to return the index of.
/// offset          where to start in the byte array.
///
/// returns Ok(index) where `index` is the index of the `seek_byte` in the `bytes` array,
/// or Err(benc_err) where benc_err is the BencodeError that occurred while looking for the
/// `seek_byte`.
fn ascii_integer(bytes: &[u8], seek_byte: u8, offset: usize) -> std::result::Result<usize, Error> {
    for (i, b) in bytes[offset..].iter().enumerate() {
        match b {
            48..=57 => continue, // [0-9] keep parsing
            x if x == &seek_byte => return Ok(i + offset),
            128..=u8::MAX => return Err(InvalidASCIIBytes), // TODO: does Bencode support extended ASCII?
            _ => return Err(InvalidIntegerString), // anything other than [0-9] or 'e' is an invalid integer
        };
    }
    return Err(MissingSentinel);
}