use std::ops::{Range, RangeInclusive};
use encoder::{GenericEncoderConfig, LargerTrgtNaiveTests};
use op_maker::translate_inner_ops;
use smdiff_common::{AddOp, Copy, CopySrc, Format, Run, MAX_INST_SIZE, MAX_WIN_SIZE};
use smdiff_writer::make_sections;
pub use src_matcher::SrcMatcherConfig;
pub use trgt_matcher::TrgtMatcherConfig;
use writer::section_writer;
mod hasher;
mod hashmap;
mod trgt_matcher;
mod src_matcher;
mod op_maker;
mod encoder;
pub mod writer;
pub mod zstd{
pub use zstd::stream::Encoder;
}
pub mod brotli {
pub use brotlic::{encode::{BrotliEncoderOptions,CompressorWriter},BlockSize,CompressionMode,Quality,WindowSize};
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
struct Add <'a> {
bytes: &'a [u8],
}
impl AddOp for Add<'_> {
fn bytes(&self) -> &[u8] {
&self.bytes
}
}
type Op<'a> = smdiff_common::Op<Add<'a>>;
#[derive(Clone, Debug)]
pub enum SecondaryCompression {
Smdiff(TrgtMatcherConfig),
Zstd{level:i32},
Brotli{options: ::brotlic::BrotliEncoderOptions},
}
impl SecondaryCompression {
pub fn new_smdiff_default() -> Self {
SecondaryCompression::Smdiff (TrgtMatcherConfig::comp_level(3))
}
pub fn new_zstd_default() -> Self {
SecondaryCompression::Zstd { level: 3 }
}
pub fn new_brotli_default() -> Self {
SecondaryCompression::Brotli { options: ::brotlic::BrotliEncoderOptions::default() }
}
pub fn algo_value(&self) -> u8 {
match self {
SecondaryCompression::Smdiff { .. } => 1,
SecondaryCompression::Zstd { .. } => 2,
SecondaryCompression::Brotli { .. } => 3,
}
}
}
impl Default for SecondaryCompression {
fn default() -> Self {
Self::new_zstd_default()
}
}
#[derive(Clone, Debug)]
pub struct EncoderConfig {
pub match_src: Option<SrcMatcherConfig>,
pub match_trgt: Option<TrgtMatcherConfig>,
pub sec_comp: Option<SecondaryCompression>,
pub format: Format,
pub output_segment_size: usize,
pub naive_tests: Option<LargerTrgtNaiveTests>,
pub lazy_escape_len: Option<usize>,
}
impl EncoderConfig {
pub fn new() -> Self {
Self::default()
}
pub fn no_match_src(mut self) -> Self {
self.match_src = None;
self
}
pub fn no_match_target(mut self) -> Self {
self.match_trgt = None;
self
}
pub fn no_sec_comp(mut self) -> Self {
self.sec_comp = None;
self
}
pub fn set_match_src(mut self, config: SrcMatcherConfig) -> Self {
self.match_src = Some(config);
self
}
pub fn set_sec_comp(mut self, sec_comp: SecondaryCompression) -> Self {
self.sec_comp = Some(sec_comp);
self
}
pub fn format_interleaved(mut self) -> Self {
self.format = Format::Interleaved;
self
}
pub fn format_segregated(mut self) -> Self {
self.format = Format::Segregated;
self
}
pub fn set_match_target(mut self, config: TrgtMatcherConfig) -> Self {
self.match_trgt = Some(config);
self
}
pub fn set_output_segment_size(mut self, size: usize) -> Self {
self.output_segment_size = size;
self
}
pub fn set_naive_tests(mut self, tests: LargerTrgtNaiveTests) -> Self {
self.naive_tests = Some(tests);
self
}
pub fn set_lazy_escape_len(mut self, len: usize) -> Self {
self.lazy_escape_len = Some(len);
self
}
pub fn comp_level(level: usize,match_trgt:bool,sec_comp:Option<SecondaryCompression>) -> Self {
let match_trgt = if match_trgt {
Some(TrgtMatcherConfig::comp_level(level))
}else{
None
};
let format = if sec_comp.is_some() {
Format::Segregated
}else{
Format::Interleaved
};
Self {
match_src: Some(SrcMatcherConfig::comp_level(level)),
output_segment_size: MAX_WIN_SIZE,
format,
match_trgt,
sec_comp,
naive_tests: None,
lazy_escape_len: None,
}
}
}
impl Default for EncoderConfig {
fn default() -> Self {
Self {
match_src: Some(SrcMatcherConfig::comp_level(3)),
output_segment_size: MAX_WIN_SIZE,
format: Format::Interleaved,
match_trgt: None,
sec_comp: None,
naive_tests: None,
lazy_escape_len: None,
}
}
}
pub fn encode<R: std::io::Read+std::io::Seek, W: std::io::Write>(dict: Option<&mut R>, output: &mut R, writer: &mut W,config:&EncoderConfig) -> std::io::Result<()> {
let mut src_bytes = Vec::new();
if let Some(r) = dict {
r.read_to_end(&mut src_bytes)?;
}
let mut trgt_bytes = Vec::new();
output.read_to_end(&mut trgt_bytes)?;
let src = src_bytes.as_slice();
let trgt = trgt_bytes.as_slice();
let EncoderConfig { match_src, match_trgt, sec_comp, format,output_segment_size, naive_tests, lazy_escape_len } = config.clone();
let segment_size = output_segment_size.min(MAX_WIN_SIZE).max(MAX_INST_SIZE);
let mut inner_config = GenericEncoderConfig{
match_trgt,
match_src,
lazy_escape_len,
naive_tests,
};
let segments = encoder::encode_inner(&mut inner_config, src, trgt);
let ops = translate_inner_ops(trgt, segments);
let mut cur_o_pos: usize = 0;
let mut win_data = Vec::new();
for (seg_ops,mut header) in make_sections(&ops, segment_size){
header.format = format;
debug_assert!({
let mut o = cur_o_pos;
seg_ops.iter().all(
|op| {
let len = op.oal() as usize;
let test = &trgt[o..o + len];
o += len;
match op{
Op::Add(Add { bytes }) => test == &bytes[..],
Op::Copy(Copy { src:CopySrc::Dict, addr, len }) => test == &src[*addr as usize..*addr as usize + *len as usize],
Op::Copy(Copy { src:CopySrc::Output, addr, len }) => test == &trgt[*addr as usize..*addr as usize + *len as usize],
Op::Run(Run{ byte, .. }) => test.iter().all(|b| b == byte),
}
}
)
});
cur_o_pos += header.output_size as usize;
section_writer(&sec_comp, header, writer, seg_ops, &mut win_data)?; }
Ok(())
}
struct Ranger {
input_range: Range<usize>,
output_range: RangeInclusive<usize>,
input_span: usize,
output_span: usize,
is_inverted: bool,
}
impl Ranger {
fn new(input_range: Range<usize>, output_range: RangeInclusive<usize>) -> Self {
let input_span = input_range.end - input_range.start - 1;
let is_inverted = output_range.start() > output_range.end();
let output_span = output_range.end().abs_diff(*output_range.start());
Self { input_range, output_range, input_span, output_span, is_inverted }
}
fn map(&self, input_value: usize) -> usize {
let input_value = input_value.clamp(self.input_range.start, self.input_range.end-1);
let b = self.output_range.start().min(self.output_range.end());
let m = input_value - self.input_range.start;
let output = b + ((self.output_span * m) / self.input_span);
if self.is_inverted{
self.output_span+self.output_range.end() - output + self.output_range.end()
}else{
output
}
}
}
#[cfg(test)]
mod test_super {
use super::*;
#[test]
fn test_regular_mapping() {
let input_range = 1..11;
let output_range = 1..=100;
let interpolator = Ranger::new(input_range, output_range);
assert_eq!(interpolator.map(1), 1);
assert_eq!(interpolator.map(2), 12);
assert_eq!(interpolator.map(3), 23);
assert_eq!(interpolator.map(4), 34);
assert_eq!(interpolator.map(5), 45);
assert_eq!(interpolator.map(6), 56);
assert_eq!(interpolator.map(7), 67);
assert_eq!(interpolator.map(8), 78);
assert_eq!(interpolator.map(9), 89);
assert_eq!(interpolator.map(10), 100);
}
#[test]
fn test_inverted_mapping() {
let input_range = 1..11;
let output_range = 100..=1; let interpolator = Ranger::new(input_range, output_range);
assert_eq!(interpolator.map(1), 100);
assert_eq!(interpolator.map(5), 56);
assert_eq!(interpolator.map(10), 1);
}
#[test]
fn test_out_of_range_input() {
let input_range = 3..10;
let output_range = 0..=100;
let interpolator = Ranger::new(input_range, output_range);
assert_eq!(interpolator.map(0), interpolator.map(3)); assert_eq!(interpolator.map(11), interpolator.map(10)); }
}