smdiff_encoder/
lib.rs

1
2use std::ops::{Range, RangeInclusive};
3
4use encoder::{GenericEncoderConfig, LargerTrgtNaiveTests};
5use op_maker::translate_inner_ops;
6use smdiff_common::{AddOp, Copy, CopySrc, Format, Run, MAX_INST_SIZE, MAX_WIN_SIZE};
7use smdiff_writer::make_sections;
8pub use src_matcher::SrcMatcherConfig;
9pub use trgt_matcher::TrgtMatcherConfig;
10use writer::section_writer;
11
12
13
14mod hasher;
15mod hashmap;
16mod trgt_matcher;
17mod src_matcher;
18mod op_maker;
19mod encoder;
20pub mod writer;
21
22pub mod zstd{
23//! This module is a re-export of the zstd encoder used in the secondary compression.
24    pub use zstd::stream::Encoder;
25}
26pub mod brotli {
27//! This module is a re-export of the brotli encoder used in the secondary compression.
28//! It also exports the config options.
29    pub use brotlic::{encode::{BrotliEncoderOptions,CompressorWriter},BlockSize,CompressionMode,Quality,WindowSize};
30}
31
32/// The Add operation for the encoder.
33#[derive(Copy, Clone, Debug, PartialEq, Eq)]
34struct Add <'a> {
35    bytes: &'a [u8],
36}
37impl AddOp for Add<'_> {
38    fn bytes(&self) -> &[u8] {
39        &self.bytes
40    }
41}
42
43type Op<'a> = smdiff_common::Op<Add<'a>>;
44
45/// The secondary compression algorithm to use.
46/// Default Value: Zstd { level: 3 }
47#[derive(Clone, Debug)]
48pub enum SecondaryCompression {
49    /// Default Value: TrgtMatcherConfig::new_from_compression_level(3)
50    Smdiff(TrgtMatcherConfig),
51    /// Value of 1..=22.
52    /// Default Value: 3
53    Zstd{level:i32},
54    /// Default Value: BrotliEncoderOptions::default()
55    Brotli{options: ::brotlic::BrotliEncoderOptions},
56}
57
58impl SecondaryCompression {
59    pub fn new_smdiff_default() -> Self {
60        SecondaryCompression::Smdiff (TrgtMatcherConfig::comp_level(3))
61    }
62
63    pub fn new_zstd_default() -> Self {
64        SecondaryCompression::Zstd { level: 3 }
65    }
66
67    pub fn new_brotli_default() -> Self {
68        SecondaryCompression::Brotli { options: ::brotlic::BrotliEncoderOptions::default() }
69    }
70    /// Returns the value to use in the header. Per the spec.
71    pub fn algo_value(&self) -> u8 {
72        match self {
73            SecondaryCompression::Smdiff { .. } => 1,
74            SecondaryCompression::Zstd { .. } => 2,
75            SecondaryCompression::Brotli { .. } => 3,
76        }
77    }
78}
79impl Default for SecondaryCompression {
80    fn default() -> Self {
81        Self::new_zstd_default()
82    }
83}
84
85/// Configuration for the encoder.
86///
87/// Default values are:
88/// - match_src: Some(SrcMatcherConfig::new_from_compression_level(3))
89/// - match_target: None
90/// - sec_comp: None
91/// - format: Interleaved
92/// - output_segment_size: MAX_WIN_SIZE
93/// - naive_tests: None
94/// - lazy_escape_len: Some(45)
95#[derive(Clone, Debug)]
96pub struct EncoderConfig {
97    /// Do we consider the src file as a dictionary to find matches?
98    /// If so (Some(_)), any preferences set in the MatcherConfig will be used.
99    /// Default Value: Some(SrcMatcherConfig::new_from_compression_level(3))
100    pub match_src: Option<SrcMatcherConfig>,
101    /// Whether to use the output file in an attempt to compress itself.
102    /// If so (Some(_)), any preferences set in the MatcherConfig will be used.
103    /// Default Value: None
104    pub match_trgt: Option<TrgtMatcherConfig>,
105    /// None for no secondary compression.
106    /// Default Value: None
107    pub sec_comp: Option<SecondaryCompression>,
108    /// Whether to interleave or segregate the Add bytes.
109    /// Default Value: Interleaved
110    pub format: Format,
111    /// The size of the output window.
112    /// Default Value: MAX_WIN_SIZE
113    /// The minimum value is MAX_INST_SIZE.
114    pub output_segment_size: usize,
115    /// The types of naive tests to run.
116    /// Default Value: None
117    pub naive_tests: Option<LargerTrgtNaiveTests>,
118    /// The length of a match that will end the lazy matching sequence.
119    /// Default Value: Some(45)
120    pub lazy_escape_len: Option<usize>,
121
122}
123
124impl EncoderConfig {
125    pub fn new() -> Self {
126        Self::default()
127    }
128    pub fn no_match_src(mut self) -> Self {
129        self.match_src = None;
130        self
131    }
132    pub fn no_match_target(mut self) -> Self {
133        self.match_trgt = None;
134        self
135    }
136    pub fn no_sec_comp(mut self) -> Self {
137        self.sec_comp = None;
138        self
139    }
140    pub fn set_match_src(mut self, config: SrcMatcherConfig) -> Self {
141        self.match_src = Some(config);
142        self
143    }
144    pub fn set_sec_comp(mut self, sec_comp: SecondaryCompression) -> Self {
145        self.sec_comp = Some(sec_comp);
146        self
147    }
148    pub fn format_interleaved(mut self) -> Self {
149        self.format = Format::Interleaved;
150        self
151    }
152    pub fn format_segregated(mut self) -> Self {
153        self.format = Format::Segregated;
154        self
155    }
156    pub fn set_match_target(mut self, config: TrgtMatcherConfig) -> Self {
157        self.match_trgt = Some(config);
158        self
159    }
160    pub fn set_output_segment_size(mut self, size: usize) -> Self {
161        self.output_segment_size = size;
162        self
163    }
164    pub fn set_naive_tests(mut self, tests: LargerTrgtNaiveTests) -> Self {
165        self.naive_tests = Some(tests);
166        self
167    }
168    pub fn set_lazy_escape_len(mut self, len: usize) -> Self {
169        self.lazy_escape_len = Some(len);
170        self
171    }
172    /// Use the short hand compression level.
173    /// If match_trgt is true, the same compression level will be used to set the TrgtMatcherConfig.
174    /// If secondary compression is Some(_), the format will be Segregated, else Interleaved.
175    pub fn comp_level(level: usize,match_trgt:bool,sec_comp:Option<SecondaryCompression>) -> Self {
176        let match_trgt = if match_trgt {
177            Some(TrgtMatcherConfig::comp_level(level))
178        }else{
179            None
180        };
181        let format = if sec_comp.is_some() {
182            Format::Segregated
183        }else{
184            Format::Interleaved
185        };
186        Self {
187            match_src: Some(SrcMatcherConfig::comp_level(level)),
188            output_segment_size: MAX_WIN_SIZE,
189            format,
190            match_trgt,
191            sec_comp,
192            naive_tests: None,
193            lazy_escape_len: None,
194        }
195    }
196}
197impl Default for EncoderConfig {
198    fn default() -> Self {
199        Self {
200            match_src: Some(SrcMatcherConfig::comp_level(3)),
201            output_segment_size: MAX_WIN_SIZE,
202            format: Format::Interleaved,
203            match_trgt: None,
204            sec_comp: None,
205            naive_tests: None,
206            lazy_escape_len: None,
207        }
208    }
209}
210/// Encodes a delta file based on the given configuration and inputs.
211/// # Arguments
212/// * `dict` - The source file to use as a dictionary. If None, the source file will not be used.
213/// * `output` - The target file to encode.
214/// * `writer` - The writer to write the encoded data to.
215/// * `config` - The configuration to use for the encoder.
216/// # Errors
217/// Returns an error if there was an issue reading the source or target files, or writing the encoded data.
218pub fn encode<R: std::io::Read+std::io::Seek, W: std::io::Write>(dict: Option<&mut R>, output: &mut R, writer: &mut W,config:&EncoderConfig) -> std::io::Result<()> {
219    //this simple encoder will just read all the bytes to memory.
220    let mut src_bytes = Vec::new();
221    if let Some(r) = dict {
222        r.read_to_end(&mut src_bytes)?;
223    }
224    let mut trgt_bytes = Vec::new();
225    output.read_to_end(&mut trgt_bytes)?;
226    let src = src_bytes.as_slice();
227    let trgt = trgt_bytes.as_slice();
228    let EncoderConfig { match_src, match_trgt, sec_comp, format,output_segment_size, naive_tests, lazy_escape_len } = config.clone();
229    let segment_size = output_segment_size.min(MAX_WIN_SIZE).max(MAX_INST_SIZE);
230    let mut inner_config = GenericEncoderConfig{
231        match_trgt,
232        match_src,
233        lazy_escape_len,
234        naive_tests,
235    };
236    let segments = encoder::encode_inner(&mut inner_config, src, trgt);
237    // dbg!(&inner_config);
238    let ops = translate_inner_ops(trgt, segments);
239    let mut cur_o_pos: usize = 0;
240    let mut win_data = Vec::new();
241    for (seg_ops,mut header) in make_sections(&ops, segment_size){
242        header.format = format;
243        debug_assert!({
244            let mut o = cur_o_pos;
245            seg_ops.iter().all(
246                |op| {
247                    let len = op.oal() as usize;
248                    let test = &trgt[o..o + len];
249                    o += len;
250                    match op{
251                        Op::Add(Add { bytes }) => test == &bytes[..],
252                        Op::Copy(Copy { src:CopySrc::Dict, addr, len }) => test == &src[*addr as usize..*addr as usize + *len as usize],
253                        Op::Copy(Copy { src:CopySrc::Output, addr, len }) => test == &trgt[*addr as usize..*addr as usize + *len as usize],
254                        Op::Run(Run{ byte, .. }) => test.iter().all(|b| b == byte),
255                    }
256                }
257            )
258        });
259        cur_o_pos += header.output_size as usize;
260        section_writer(&sec_comp, header, writer, seg_ops, &mut win_data)?; //write the section
261    }
262    Ok(())
263}
264
265
266/// This just simplifies mapping a 0..9 comp_level to various ranges for various settings.
267struct Ranger {
268    input_range: Range<usize>,
269    output_range: RangeInclusive<usize>,
270    input_span: usize,
271    output_span: usize,
272    is_inverted: bool,
273}
274
275impl Ranger {
276    fn new(input_range: Range<usize>, output_range: RangeInclusive<usize>) -> Self {
277        let input_span = input_range.end - input_range.start - 1;
278        let is_inverted = output_range.start() > output_range.end();
279        let output_span = output_range.end().abs_diff(*output_range.start());
280
281        Self { input_range, output_range, input_span, output_span, is_inverted }
282    }
283
284    fn map(&self, input_value: usize) -> usize {
285        let input_value = input_value.clamp(self.input_range.start, self.input_range.end-1);
286        let b = self.output_range.start().min(self.output_range.end());
287        let m = input_value - self.input_range.start;
288        //let m = if self.is_inverted {self.input_range.end - input_value}else{input_value-self.input_range.start};
289        let output = b + ((self.output_span * m) / self.input_span);
290        if self.is_inverted{
291            self.output_span+self.output_range.end() - output + self.output_range.end()
292        }else{
293            output
294        }
295        //Some(output.clamp(*b, b+self.output_span))
296    }
297}
298
299
300
301#[cfg(test)]
302mod test_super {
303    use super::*;
304
305
306    #[test]
307    fn test_regular_mapping() {
308        let input_range = 1..11;
309        let output_range = 1..=100;
310        let interpolator = Ranger::new(input_range, output_range);
311
312        assert_eq!(interpolator.map(1), 1);
313        assert_eq!(interpolator.map(2), 12);
314        assert_eq!(interpolator.map(3), 23);
315        assert_eq!(interpolator.map(4), 34);
316        assert_eq!(interpolator.map(5), 45);
317        assert_eq!(interpolator.map(6), 56);
318        assert_eq!(interpolator.map(7), 67);
319        assert_eq!(interpolator.map(8), 78);
320        assert_eq!(interpolator.map(9), 89);
321        assert_eq!(interpolator.map(10), 100);
322    }
323
324    #[test]
325    fn test_inverted_mapping() {
326        let input_range = 1..11;
327        let output_range = 100..=1; // Inverted range
328        let interpolator = Ranger::new(input_range, output_range);
329
330        assert_eq!(interpolator.map(1), 100);
331        assert_eq!(interpolator.map(5), 56);
332        assert_eq!(interpolator.map(10), 1);
333    }
334
335    #[test]
336    fn test_out_of_range_input() {
337        let input_range = 3..10;
338        let output_range = 0..=100;
339        let interpolator = Ranger::new(input_range, output_range);
340
341        assert_eq!(interpolator.map(0), interpolator.map(3)); // Below range
342        assert_eq!(interpolator.map(11), interpolator.map(10)); // Above range
343    }
344
345}