#![feature(box_patterns)]
#![feature(vec_resize_default)]
extern crate bincode; extern crate binjs_shared;
extern crate brotli;
extern crate clap;
#[macro_use]
extern crate derive_more;
extern crate flate2;
extern crate itertools;
extern crate lzw;
#[macro_use]
extern crate log;
extern crate rand;
extern crate range_encoding;
#[macro_use]
extern crate serde_derive;
extern crate serde;
extern crate vec_map;
extern crate xml as xml_rs;
use std::fmt::{ Debug, Formatter };
use std::cell::RefCell;
use std::rc::Rc;
use rand::Rng;
use rand::distributions::{ Distribution, Standard };
use rand::seq::SliceRandom;
pub use bytes::compress::Compression;
#[derive(Debug)]
pub enum TokenWriterError {
InvalidOffsetField,
NotInDictionary(String),
WriteError(std::io::Error),
}
#[derive(Debug)]
pub enum TokenReaderError {
NotInDictionary(String),
ReadError(std::io::Error),
BadLength { expected: usize, got: usize },
BadHeader,
BadCompression(std::io::Error),
EndOffsetError {
start: u64,
expected: u64,
found: u64,
description: String,
},
BadStringIndex(u32),
InvalidValue,
BadKindIndex(u32),
Encoding(std::string::FromUtf8Error),
EmptyNodeName,
EmptyFieldName,
EmptyVariant,
EmptyBool,
EmptyString,
EmptyList,
BadEnumVariant,
}
impl TokenReaderError {
pub fn invalid_value<T: std::fmt::Debug>(value: &T) -> Self {
error!(target: "token_reader", "InvalidValue {:?}", value);
TokenReaderError::InvalidValue
}
}
pub mod bytes;
#[macro_use]
pub mod io;
pub use io::*;
pub mod simple;
pub mod multipart;
pub mod entropy;
pub mod xml;
mod util;
mod escaped_wtf8;
const ADVANCED_COMMAND: &str = "advanced";
#[derive(Clone, Debug)]
pub enum DictionaryPlacement {
Header,
Inline
}
#[derive(Clone, Debug)]
enum Compressing {
Uncompressed(Rc<RefCell<Vec<u8>>>),
Compressed {
data: Rc<Vec<u8>>,
result: bytes::compress::CompressionResult,
},
}
#[derive(Clone)]
pub struct CompressionTarget {
data: Compressing,
format: bytes::compress::Compression,
}
impl Debug for CompressionTarget {
fn fmt(&self, f: &mut Formatter) -> Result<(), std::fmt::Error> {
self.format.fmt(f)
}
}
impl CompressionTarget {
pub fn new(format: bytes::compress::Compression) -> Self {
Self {
data: Compressing::Uncompressed(Rc::new(RefCell::new(vec![]))),
format,
}
}
pub fn done(&mut self) -> std::result::Result<(Rc<Vec<u8>>, bytes::compress::CompressionResult), std::io::Error> {
let (data, result) = match self.data {
Compressing::Compressed { ref result, ref data } => return Ok((data.clone(), result.clone())),
Compressing::Uncompressed(ref data) => {
let mut buf = vec![];
let result = self.format.compress(&data.borrow().as_ref(), &mut buf)?;
(Rc::new(buf), result)
}
};
self.data = Compressing::Compressed {
result: result.clone(),
data: data.clone(),
};
Ok((data, result))
}
pub fn reset(&mut self) {
self.data = Compressing::Uncompressed(Rc::new(RefCell::new(vec![])));
}
pub fn len(&self) -> usize {
match self.data {
Compressing::Uncompressed(ref data) => data.borrow().len(),
Compressing::Compressed { ref result, .. } => result.before_bytes,
}
}
}
impl Distribution<CompressionTarget> for Standard {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> CompressionTarget {
CompressionTarget::new(rng.gen())
}
}
impl std::io::Write for CompressionTarget {
fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
match self.data {
Compressing::Uncompressed(ref buf) => {
let mut borrow = buf.borrow_mut();
borrow.extend_from_slice(data);
Ok(data.len())
},
_ => panic!("Attempting to add data to a CompressionTarget that is already closed")
}
}
fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
Ok(())
}
}
impl Default for CompressionTarget {
fn default() -> Self {
Self::new(bytes::compress::Compression::Identity)
}
}
pub trait FormatProvider {
fn subcommand<'a, 'b>(&self) -> clap::App<'a, 'b>;
fn handle_subcommand(&self, matches: Option<&clap::ArgMatches>) -> Result<::Format, ::std::io::Error>;
}
pub enum Format {
Simple,
Multipart {
targets: multipart::Targets,
stats: Rc<RefCell<multipart::Statistics>>
},
XML,
Entropy {
options: entropy::Options,
}
}
impl Distribution<Format> for Standard {
fn sample<'a, R: Rng + ?Sized>(&self, rng: &'a mut R) -> Format {
let generators = [
Rc::new(|_| Format::simple()) as Rc<Fn(&'a mut R) -> Format>,
Rc::new(|rng| {
use multipart::{ Statistics, Targets };
let stats = Rc::new(RefCell::new(Statistics::default()
.with_source_bytes(0)));
Format::Multipart {
targets: Targets {
strings_table: rng.gen(),
grammar_table: rng.gen(),
tree: rng.gen(),
},
stats
}
}),
Rc::new(|_| Format::XML),
];
let pick : Rc<Fn(&'a mut R) -> Format> = generators.choose(rng)
.map(Rc::clone)
.unwrap(); pick(rng)
}
}
impl Format {
pub fn simple() -> Self {
Format::Simple
}
pub fn randomize_options<R: rand::Rng>(self, rng: &mut R) -> Self {
match self {
Format::Simple => Format::Simple,
Format::XML => Format::XML,
Format::Multipart { stats, .. } =>
Format::Multipart {
targets: multipart::Targets {
strings_table: rng.gen(),
grammar_table: rng.gen(),
tree: rng.gen(),
},
stats
}
,
Format::Entropy { .. } => unimplemented!()
}
}
pub fn name(&self) -> String {
match *self {
Format::Simple { .. } => "Simple".to_string(),
Format::Multipart { .. } => "Multipart".to_string(),
Format::XML => "XML".to_string(),
Format::Entropy { .. } => "Entropy".to_string(),
}
}
pub fn with_sections<F, E>(&mut self, mut f: F) -> Result<(), E> where F: FnMut(&mut CompressionTarget, &str) -> Result<(), E> {
match *self {
Format::Simple { .. } |
Format::XML => {
Ok(())
}
Format::Entropy { ..} => {
Ok(())
}
Format::Multipart {
targets: multipart::Targets {
ref mut grammar_table,
ref mut strings_table,
ref mut tree
},
..
} => {
f(grammar_table, "grammar")?;
f(strings_table, "strings")?;
f(tree, "tree")?;
Ok(())
}
}
}
fn providers() -> [&'static FormatProvider; 4] {
[
&multipart::FormatProvider,
&simple::FormatProvider,
&xml::FormatProvider,
&entropy::FormatProvider,
]
}
fn default_provider() -> &'static FormatProvider {
&multipart::FormatProvider
}
pub fn subcommand<'a, 'b>() -> clap::App<'a, 'b> {
clap::SubCommand::with_name(ADVANCED_COMMAND)
.subcommands(Format::providers().iter()
.map(|x| x.subcommand())
)
}
pub fn from_matches(matches: &clap::ArgMatches) -> Result<Self, std::io::Error> {
if let Some(matches) = matches.subcommand_matches(ADVANCED_COMMAND) {
for provider in Self::providers().into_iter() {
let subcommand = provider.subcommand();
let key = subcommand.get_name();
if let Some(matches) = matches.subcommand_matches(key) {
return provider.handle_subcommand(Some(matches));
}
}
}
Self::default_provider()
.handle_subcommand(None)
}
}