#![deny(
missing_docs,
future_incompatible,
nonstandard_style,
rust_2018_idioms,
missing_copy_implementations,
trivial_casts,
trivial_numeric_casts,
unsafe_code,
unused_qualifications
)]
#![deny(clippy::correctness, clippy::style, clippy::perf)]
mod iterators;
mod parse_int;
mod stack_frame;
mod token;
use memchr::memchr;
pub use iterators::{BencodeDictIter, BencodeListIter};
use parse_int::{check_integer, decode_int, is_numeric};
use stack_frame::{StackFrame, StackFrameState};
use token::{Token, TokenType};
use std::cell::Cell;
use std::convert::TryFrom;
use std::convert::TryInto;
use std::fmt;
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum BdecodeError {
ExpectedDigit,
ExpectedColon,
UnexpectedEof,
ExpectedValue,
DepthExceeded,
LimitExceeded,
Overflow,
LeadingZero,
NegativeZero,
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum NodeType {
Dict,
List,
Str,
Int,
}
#[derive(Clone)]
pub struct Bencode<'a> {
buf: &'a [u8],
tokens: Vec<Token>,
}
impl<'a> fmt::Debug for Bencode<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Bencode")
.field("content", &self.get_root())
.finish()
}
}
impl<'a> Bencode<'a> {
pub fn get_root<'t>(&'t self) -> BencodeAny<'a, 't> {
BencodeAny {
buf: self.buf,
root_tokens: &self.tokens,
token_idx: 0,
cached_lookup: Cell::new(None),
size: Cell::new(None),
}
}
}
#[derive(Clone)]
pub struct BencodeList<'a, 't> {
buf: &'a [u8],
root_tokens: &'t [Token],
token_idx: usize,
cached_lookup: Cell<Option<(usize, usize)>>,
cached_size: Cell<Option<usize>>,
}
impl<'a, 't> BencodeList<'a, 't> {
pub fn get(&self, index: usize) -> Option<BencodeAny<'a, 't>> {
let mut token = self.token_idx + 1;
let mut item = 0;
if self.root_tokens[token].token_type() == TokenType::End {
self.cached_size.set(Some(item));
return None;
}
let lookup = self.cached_lookup.get();
if let Some((last_token, last_index)) = lookup {
if last_index >= index {
token = last_token;
item = last_index;
}
}
while item < index {
token += self.root_tokens[token].next_item();
item += 1;
if self.root_tokens[token].token_type() == TokenType::End {
self.cached_size.set(Some(item));
return None;
}
}
if index > 0 {
self.cached_lookup.set(Some((token, index)));
}
Some(self.create_any(token))
}
pub fn len(&self) -> usize {
if let Some(size) = self.cached_size.get() {
return size;
}
let mut token = self.token_idx + 1;
let mut size = 0;
if let Some((last_token, last_index)) = self.cached_lookup.get() {
token = last_token;
size = last_index;
}
while self.root_tokens[token].token_type() != TokenType::End {
token += self.root_tokens[token].next_item();
size += 1;
}
self.cached_size.set(Some(size));
size
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn iter(&self) -> BencodeListIter<'a, 't> {
BencodeListIter::new(
self.buf,
self.root_tokens,
self.token_idx + 1,
self.cached_size.get().map(|size| size as u32),
)
}
fn create_any(&self, token_idx: usize) -> BencodeAny<'a, 't> {
BencodeAny {
buf: self.buf,
root_tokens: self.root_tokens,
token_idx,
cached_lookup: Cell::new(None),
size: Cell::new(None),
}
}
}
impl<'a, 't> fmt::Debug for BencodeList<'a, 't> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.iter()).finish()
}
}
#[derive(Clone)]
pub struct BencodeDict<'a, 't> {
buf: &'a [u8],
root_tokens: &'t [Token],
token_idx: usize,
cached_lookup: Cell<Option<(usize, usize)>>,
cached_size: Cell<Option<usize>>,
}
impl<'a, 't> BencodeDict<'a, 't> {
pub fn get(&self, index: usize) -> Option<(&'a [u8], BencodeAny<'a, 't>)> {
let mut token = self.token_idx + 1;
let mut item = 0;
if self.root_tokens[token].token_type() == TokenType::End {
self.cached_size.set(Some(item));
return None;
}
if let Some((last_token, last_index)) = self.cached_lookup.get() {
if last_index >= index {
token = last_token;
item = last_index;
}
}
while item < index {
assert_eq!(self.root_tokens[token].token_type(), TokenType::Str);
token += self.root_tokens[token].next_item();
if self.root_tokens[token].token_type() == TokenType::End {
self.cached_size.set(Some(item));
return None;
}
token += self.root_tokens[token].next_item();
if self.root_tokens[token].token_type() == TokenType::End {
self.cached_size.set(Some(item));
return None;
}
item += 1;
}
if index > 0 {
self.cached_lookup.set(Some((token, index)));
}
let key_node = self.create_any(token);
let key = key_node.as_string().unwrap().as_bytes();
let value_token = token + self.root_tokens[token].next_item();
let value_node = self.create_any(value_token);
Some((key, value_node))
}
pub fn find(&self, key: &[u8]) -> Option<BencodeAny<'a, 't>> {
let mut token = self.token_idx + 1;
while self.root_tokens[token].token_type() != TokenType::End {
let t = &self.root_tokens[token];
assert_eq!(t.token_type(), TokenType::Str);
let t_off = t.offset();
let t_off_start = t.start_offset();
let t_next = &self.root_tokens[token + 1];
let t_next_off = t_next.offset();
let size = t_next_off - t_off - t_off_start;
if (size == key.len())
&& (key == &self.buf[(t_off + t_off_start)..(t_off + t_off_start + size)])
{
token += t.next_item();
assert_ne!(self.root_tokens[token].token_type(), TokenType::End);
return Some(BencodeAny {
buf: self.buf,
root_tokens: self.root_tokens,
token_idx: token,
cached_lookup: Cell::new(None),
size: Cell::new(None),
});
}
token += t.next_item();
assert_ne!(self.root_tokens[token].token_type(), TokenType::End);
token += self.root_tokens[token].next_item();
}
None
}
pub fn len(&self) -> usize {
if let Some(size) = self.cached_size.get() {
return size;
}
let mut token = self.token_idx + 1;
let mut item = 0;
if let Some((last_token, last_index)) = self.cached_lookup.get() {
token = last_token;
item = last_index * 2;
}
while self.root_tokens[token].token_type() != TokenType::End {
token += self.root_tokens[token].next_item();
item += 1;
}
assert_eq!(item % 2, 0);
let size = item / 2;
self.cached_size.set(Some(size));
size
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn iter(&self) -> BencodeDictIter<'a, 't> {
BencodeDictIter::new(
self.buf,
self.root_tokens,
self.token_idx + 1,
self.cached_size.get().map(|size| size as u32),
)
}
fn create_any(&self, token_idx: usize) -> BencodeAny<'a, 't> {
BencodeAny {
buf: self.buf,
root_tokens: self.root_tokens,
token_idx,
cached_lookup: Cell::new(None),
size: Cell::new(None),
}
}
}
impl<'a, 't> fmt::Debug for BencodeDict<'a, 't> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map().entries(self.iter()).finish()
}
}
#[derive(Clone)]
pub struct BencodeInt<'a, 't> {
buf: &'a [u8],
root_tokens: &'t [Token],
token_idx: usize,
}
impl<'a, 't> BencodeInt<'a, 't> {
pub fn as_bytes(&self) -> &'a [u8] {
let t = &self.root_tokens[self.token_idx];
let t_off = t.offset();
debug_assert_eq!(self.buf[t_off], b'i');
let t_next = &self.root_tokens[self.token_idx + 1];
let t_next_off = t_next.offset();
debug_assert_eq!(self.buf[t_next_off - 1], b'e');
let size = t_next_off - 2 - t_off;
let int_start = t_off + 1;
&self.buf[int_start..(int_start + size)]
}
pub fn as_str(&self) -> &'a str {
std::str::from_utf8(self.as_bytes()).unwrap()
}
pub fn as_i8(&self) -> Result<i8, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_i16(&self) -> Result<i16, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_i32(&self) -> Result<i32, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_i64(&self) -> Result<i64, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_i128(&self) -> Result<i128, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_isize(&self) -> Result<isize, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_u8(&self) -> Result<u8, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_u16(&self) -> Result<u16, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_u32(&self) -> Result<u32, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_u64(&self) -> Result<u64, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_u128(&self) -> Result<u128, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
pub fn as_usize(&self) -> Result<usize, BdecodeError> {
Ok(TryFrom::try_from(self)?)
}
}
impl<'a, 't> fmt::Debug for BencodeInt<'a, 't> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
macro_rules! impl_tryfrom_bencodeint {
($int_type:ty) => {
impl<'a, 't> TryFrom<&BencodeInt<'a, 't>> for $int_type {
type Error = BdecodeError;
fn try_from(bencode_int: &BencodeInt<'a, 't>) -> Result<Self, Self::Error> {
<$int_type>::from_str_radix(bencode_int.as_str(), 10)
.map_err(|_| BdecodeError::Overflow)
}
}
};
}
impl_tryfrom_bencodeint!(i8);
impl_tryfrom_bencodeint!(i16);
impl_tryfrom_bencodeint!(i32);
impl_tryfrom_bencodeint!(i64);
impl_tryfrom_bencodeint!(i128);
impl_tryfrom_bencodeint!(isize);
impl_tryfrom_bencodeint!(u8);
impl_tryfrom_bencodeint!(u16);
impl_tryfrom_bencodeint!(u32);
impl_tryfrom_bencodeint!(u64);
impl_tryfrom_bencodeint!(u128);
impl_tryfrom_bencodeint!(usize);
#[derive(Clone)]
pub struct BencodeString<'a, 't> {
buf: &'a [u8],
root_tokens: &'t [Token],
token_idx: usize,
}
impl<'a, 't> BencodeString<'a, 't> {
pub fn as_bytes(&self) -> &'a [u8] {
let t = &self.root_tokens[self.token_idx];
let t_off = t.offset();
let t_off_start = t.start_offset();
let t_next = &self.root_tokens[self.token_idx + 1];
let t_next_off = t_next.offset();
let size = t_next_off - t_off - t_off_start;
&self.buf[(t_off + t_off_start)..(t_off + t_off_start + size)]
}
}
impl<'a, 't> fmt::Debug for BencodeString<'a, 't> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_fmt(format_args!("BencodeString({:?})", self.as_bytes()))
}
}
#[derive(Clone)]
pub struct BencodeAny<'a, 't> {
buf: &'a [u8],
root_tokens: &'t [Token],
token_idx: usize,
cached_lookup: Cell<Option<(usize, usize)>>,
size: Cell<Option<usize>>,
}
impl<'a, 't> fmt::Debug for BencodeAny<'a, 't> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.node_type() {
NodeType::Dict => {
let self_dict = self.as_dict().unwrap();
self_dict.fmt(f)
}
NodeType::List => {
let self_list = self.as_list().unwrap();
self_list.fmt(f)
}
NodeType::Int => {
let self_int = self.as_int().unwrap();
self_int.fmt(f)
}
NodeType::Str => {
let self_str = self.as_string().unwrap();
self_str.fmt(f)
}
}
}
}
impl<'a, 't> BencodeAny<'a, 't> {
pub fn node_type(&self) -> NodeType {
let token_type = self.root_tokens[self.token_idx].token_type();
match token_type {
TokenType::Dict => NodeType::Dict,
TokenType::List => NodeType::List,
TokenType::Int => NodeType::Int,
TokenType::Str => NodeType::Str,
_ => unreachable!("{:?} unexpected", token_type),
}
}
pub fn as_list(&self) -> Option<BencodeList<'a, 't>> {
if self.node_type() != NodeType::List {
return None;
}
Some(BencodeList {
buf: self.buf,
root_tokens: self.root_tokens,
token_idx: self.token_idx,
cached_lookup: Cell::new(None),
cached_size: Cell::new(None),
})
}
pub fn as_dict(&self) -> Option<BencodeDict<'a, 't>> {
if self.node_type() != NodeType::Dict {
return None;
}
Some(BencodeDict {
buf: self.buf,
root_tokens: self.root_tokens,
token_idx: self.token_idx,
cached_lookup: Cell::new(None),
cached_size: Cell::new(None),
})
}
pub fn as_int(&self) -> Option<BencodeInt<'a, 't>> {
if self.node_type() != NodeType::Int {
return None;
}
Some(BencodeInt {
buf: self.buf,
root_tokens: self.root_tokens,
token_idx: self.token_idx,
})
}
pub fn as_string(&self) -> Option<BencodeString<'a, 't>> {
if self.node_type() != NodeType::Str {
return None;
}
Some(BencodeString {
buf: self.buf,
root_tokens: self.root_tokens,
token_idx: self.token_idx,
})
}
}
pub fn bdecode(buf: &[u8]) -> Result<Bencode<'_>, BdecodeError> {
if buf.len() > Token::MAX_OFFSET {
return Err(BdecodeError::LimitExceeded);
}
if buf.is_empty() {
return Err(BdecodeError::UnexpectedEof);
}
let mut sp: usize = 0;
let mut stack: Vec<StackFrame> = Vec::with_capacity(4);
let mut tokens: Vec<Token> = Vec::with_capacity(16);
let mut off = 0;
while off < buf.len() {
let byte = buf[off];
let current_frame = sp;
if (current_frame > 0)
&& tokens[stack[current_frame - 1].token()].token_type() == TokenType::Dict
&& stack[current_frame - 1].state() == StackFrameState::Key
{
if !is_numeric(byte) && byte != b'e' {
return Err(BdecodeError::ExpectedDigit);
}
}
match byte {
b'd' => {
let new_frame =
StackFrame::new(tokens.len().try_into().unwrap(), StackFrameState::Key);
stack.push(new_frame);
sp += 1;
let new_token = Token::new(off, TokenType::Dict, 0, 0)?;
tokens.push(new_token);
off += 1;
}
b'l' => {
let new_frame =
StackFrame::new(tokens.len().try_into().unwrap(), StackFrameState::Key);
stack.push(new_frame);
sp += 1;
let new_token = Token::new(off, TokenType::List, 0, 0)?;
tokens.push(new_token);
off += 1;
}
b'i' => {
let end_index = match memchr(b'e', &buf[off..]) {
Some(idx) => off + idx,
None => {
return Err(BdecodeError::UnexpectedEof);
}
};
check_integer(&buf[(off + 1)..end_index])?;
let new_token = Token::new(off, TokenType::Int, 1, 1)?;
tokens.push(new_token);
debug_assert_eq!(buf[end_index], b'e');
off = end_index + 1;
}
b'e' => {
if sp == 0 {
return Err(BdecodeError::UnexpectedEof);
}
if sp > 0
&& (tokens[stack[sp - 1].token()].token_type() == TokenType::Dict)
&& stack[sp - 1].state() == StackFrameState::Value
{
return Err(BdecodeError::ExpectedValue);
}
let end_token = Token::new(off, TokenType::End, 1, 0)?;
tokens.push(end_token);
let top = stack[sp - 1].token();
let next_item = tokens.len() - top;
tokens[top].set_next_item(next_item)?;
debug_assert!(sp > 0);
sp -= 1;
off += 1;
}
_ => {
let str_off = off;
let colon_index = match memchr(b':', &buf[off..]) {
Some(idx) => off + idx,
None => {
return Err(BdecodeError::ExpectedColon);
}
};
debug_assert_eq!(buf[colon_index], b':');
let int_buf = &buf[off..colon_index];
check_integer(int_buf)?;
let string_length: usize = decode_int(int_buf)?
.try_into()
.map_err(|_| BdecodeError::Overflow)?;
off = colon_index + 1;
if off >= buf.len() {
return Err(BdecodeError::UnexpectedEof);
}
let remaining = buf.len() - off;
if string_length > remaining {
return Err(BdecodeError::UnexpectedEof);
}
let header_len = off - str_off - 2;
let new_token = Token::new(str_off, TokenType::Str, 1, header_len)?;
tokens.push(new_token);
off += string_length;
}
};
if current_frame > 0
&& tokens[stack[current_frame - 1].token()].token_type() == TokenType::Dict
{
stack[current_frame - 1].toggle_state();
}
if sp < current_frame {
stack.pop();
}
if sp == 0 {
break;
}
}
if sp > 0 {
return Err(BdecodeError::UnexpectedEof);
}
tokens.push(Token::new(off, TokenType::End, 0, 0)?);
Ok(Bencode { buf, tokens })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dict_list_no_end() {
let result_dict = bdecode(b"d");
assert!(result_dict.is_err());
let result_list = bdecode(b"l");
assert!(result_list.is_err());
}
#[test]
fn test_index_empty_dict() {
let bencode = bdecode(b"de").unwrap();
let dict_node = bencode.get_root();
assert_eq!(dict_node.node_type(), NodeType::Dict);
assert!(dict_node.as_dict().unwrap().get(0).is_none());
assert!(dict_node.as_dict().unwrap().find(b"my_key").is_none());
}
#[test]
fn test_index_empty_list() {
let bencode = bdecode(b"le").unwrap();
let list_node = bencode.get_root();
assert_eq!(list_node.node_type(), NodeType::List);
assert!(list_node.as_list().unwrap().get(0).is_none());
}
#[test]
fn test_list_1() {
let bencode = bdecode(b"l4:spami42ee").unwrap();
let root_node = bencode.get_root();
assert_eq!(root_node.node_type(), NodeType::List);
assert_eq!(root_node.as_list().unwrap().len(), 2);
let elem_0 = root_node.as_list().unwrap().get(0).unwrap();
assert_eq!(elem_0.node_type(), NodeType::Str);
assert_eq!(elem_0.as_string().unwrap().as_bytes(), b"spam");
let elem_1 = root_node.as_list().unwrap().get(1).unwrap();
assert_eq!(elem_1.node_type(), NodeType::Int);
assert!(root_node.as_list().unwrap().get(2).is_none());
}
#[test]
fn test_dict_1() {
let bencode = bdecode(b"d1:ad1:bi1e1:c4:abcde1:di3ee").unwrap();
let root_node = bencode.get_root();
assert_eq!(root_node.node_type(), NodeType::Dict);
assert_eq!(root_node.as_dict().unwrap().len(), 2);
let (key0, value0) = root_node.as_dict().unwrap().get(0).unwrap();
assert_eq!(key0, b"a");
assert_eq!(value0.node_type(), NodeType::Dict);
assert_eq!(value0.as_dict().unwrap().len(), 2);
let (key00, value00) = value0.as_dict().unwrap().get(0).unwrap();
assert_eq!(key00, b"b");
assert_eq!(value00.node_type(), NodeType::Int);
assert_eq!(value00.as_int().unwrap().as_i64().unwrap(), 1);
let (key01, value01) = value0.as_dict().unwrap().get(1).unwrap();
assert_eq!(key01, b"c");
assert_eq!(value01.node_type(), NodeType::Str);
assert_eq!(value01.as_string().unwrap().as_bytes(), b"abcd");
let (key1, value1) = root_node.as_dict().unwrap().get(1).unwrap();
assert_eq!(key1, b"d");
assert_eq!(value1.node_type(), NodeType::Int);
assert_eq!(value1.as_int().unwrap().as_i64().unwrap(), 3);
}
#[test]
fn test_list_size() {
for x in 0..100 {
let mut bencode_buf = "l".to_string();
for y in 0..x {
bencode_buf += &format!("li{}ee", y);
}
bencode_buf += "e";
let bencode = bdecode(bencode_buf.as_bytes()).unwrap();
let root_node = bencode.get_root();
println!("{:?}", root_node);
assert_eq!(root_node.as_list().unwrap().len(), x)
}
}
#[test]
fn test_bencode_int_as_type() {
let buf = b"i42e";
let bencode = bdecode(buf).unwrap();
let root_node = bencode.get_root();
let bencode_int = root_node.as_int().unwrap();
assert_eq!(bencode_int.as_i8().unwrap(), 42);
assert_eq!(bencode_int.as_i16().unwrap(), 42);
assert_eq!(bencode_int.as_i32().unwrap(), 42);
assert_eq!(bencode_int.as_i64().unwrap(), 42);
assert_eq!(bencode_int.as_i128().unwrap(), 42);
assert_eq!(bencode_int.as_isize().unwrap(), 42);
assert_eq!(bencode_int.as_u8().unwrap(), 42);
assert_eq!(bencode_int.as_u16().unwrap(), 42);
assert_eq!(bencode_int.as_u32().unwrap(), 42);
assert_eq!(bencode_int.as_u64().unwrap(), 42);
assert_eq!(bencode_int.as_u128().unwrap(), 42);
assert_eq!(bencode_int.as_usize().unwrap(), 42);
}
}