1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

use std::ops::{Range, RangeInclusive};

use encoder::{GenericEncoderConfig, LargerTrgtNaiveTests};
use op_maker::translate_inner_ops;
use smdiff_common::{AddOp, Copy, CopySrc, Format, Run, MAX_INST_SIZE, MAX_WIN_SIZE};
use smdiff_writer::make_sections;
pub use src_matcher::SrcMatcherConfig;
pub use trgt_matcher::TrgtMatcherConfig;
use writer::section_writer;



mod hasher;
mod hashmap;
mod trgt_matcher;
mod src_matcher;
mod op_maker;
mod encoder;
pub mod writer;

pub mod zstd{
//! This module is a re-export of the zstd encoder used in the secondary compression.
    pub use zstd::stream::Encoder;
}
pub mod brotli {
//! This module is a re-export of the brotli encoder used in the secondary compression.
//! It also exports the config options.
    pub use brotlic::{encode::{BrotliEncoderOptions,CompressorWriter},BlockSize,CompressionMode,Quality,WindowSize};
}

/// The Add operation for the encoder.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
struct Add <'a> {
    bytes: &'a [u8],
}
impl AddOp for Add<'_> {
    fn bytes(&self) -> &[u8] {
        &self.bytes
    }
}

type Op<'a> = smdiff_common::Op<Add<'a>>;

/// The secondary compression algorithm to use.
/// Default Value: Zstd { level: 3 }
#[derive(Clone, Debug)]
pub enum SecondaryCompression {
    /// Default Value: TrgtMatcherConfig::new_from_compression_level(3)
    Smdiff(TrgtMatcherConfig),
    /// Value of 1..=22.
    /// Default Value: 3
    Zstd{level:i32},
    /// Default Value: BrotliEncoderOptions::default()
    Brotli{options: ::brotlic::BrotliEncoderOptions},
}

impl SecondaryCompression {
    pub fn new_smdiff_default() -> Self {
        SecondaryCompression::Smdiff (TrgtMatcherConfig::comp_level(3))
    }

    pub fn new_zstd_default() -> Self {
        SecondaryCompression::Zstd { level: 3 }
    }

    pub fn new_brotli_default() -> Self {
        SecondaryCompression::Brotli { options: ::brotlic::BrotliEncoderOptions::default() }
    }
    /// Returns the value to use in the header. Per the spec.
    pub fn algo_value(&self) -> u8 {
        match self {
            SecondaryCompression::Smdiff { .. } => 1,
            SecondaryCompression::Zstd { .. } => 2,
            SecondaryCompression::Brotli { .. } => 3,
        }
    }
}
impl Default for SecondaryCompression {
    fn default() -> Self {
        Self::new_zstd_default()
    }
}

/// Configuration for the encoder.
///
/// Default values are:
/// - match_src: Some(SrcMatcherConfig::new_from_compression_level(3))
/// - match_target: None
/// - sec_comp: None
/// - format: Interleaved
/// - output_segment_size: MAX_WIN_SIZE
/// - naive_tests: None
/// - lazy_escape_len: Some(45)
#[derive(Clone, Debug)]
pub struct EncoderConfig {
    /// Do we consider the src file as a dictionary to find matches?
    /// If so (Some(_)), any preferences set in the MatcherConfig will be used.
    /// Default Value: Some(SrcMatcherConfig::new_from_compression_level(3))
    pub match_src: Option<SrcMatcherConfig>,
    /// Whether to use the output file in an attempt to compress itself.
    /// If so (Some(_)), any preferences set in the MatcherConfig will be used.
    /// Default Value: None
    pub match_trgt: Option<TrgtMatcherConfig>,
    /// None for no secondary compression.
    /// Default Value: None
    pub sec_comp: Option<SecondaryCompression>,
    /// Whether to interleave or segregate the Add bytes.
    /// Default Value: Interleaved
    pub format: Format,
    /// The size of the output window.
    /// Default Value: MAX_WIN_SIZE
    /// The minimum value is MAX_INST_SIZE.
    pub output_segment_size: usize,
    /// The types of naive tests to run.
    /// Default Value: None
    pub naive_tests: Option<LargerTrgtNaiveTests>,
    /// The length of a match that will end the lazy matching sequence.
    /// Default Value: Some(45)
    pub lazy_escape_len: Option<usize>,

}

impl EncoderConfig {
    pub fn new() -> Self {
        Self::default()
    }
    pub fn no_match_src(mut self) -> Self {
        self.match_src = None;
        self
    }
    pub fn no_match_target(mut self) -> Self {
        self.match_trgt = None;
        self
    }
    pub fn no_sec_comp(mut self) -> Self {
        self.sec_comp = None;
        self
    }
    pub fn set_match_src(mut self, config: SrcMatcherConfig) -> Self {
        self.match_src = Some(config);
        self
    }
    pub fn set_sec_comp(mut self, sec_comp: SecondaryCompression) -> Self {
        self.sec_comp = Some(sec_comp);
        self
    }
    pub fn format_interleaved(mut self) -> Self {
        self.format = Format::Interleaved;
        self
    }
    pub fn format_segregated(mut self) -> Self {
        self.format = Format::Segregated;
        self
    }
    pub fn set_match_target(mut self, config: TrgtMatcherConfig) -> Self {
        self.match_trgt = Some(config);
        self
    }
    pub fn set_output_segment_size(mut self, size: usize) -> Self {
        self.output_segment_size = size;
        self
    }
    pub fn set_naive_tests(mut self, tests: LargerTrgtNaiveTests) -> Self {
        self.naive_tests = Some(tests);
        self
    }
    pub fn set_lazy_escape_len(mut self, len: usize) -> Self {
        self.lazy_escape_len = Some(len);
        self
    }
    /// Use the short hand compression level.
    /// If match_trgt is true, the same compression level will be used to set the TrgtMatcherConfig.
    /// If secondary compression is Some(_), the format will be Segregated, else Interleaved.
    pub fn comp_level(level: usize,match_trgt:bool,sec_comp:Option<SecondaryCompression>) -> Self {
        let match_trgt = if match_trgt {
            Some(TrgtMatcherConfig::comp_level(level))
        }else{
            None
        };
        let format = if sec_comp.is_some() {
            Format::Segregated
        }else{
            Format::Interleaved
        };
        Self {
            match_src: Some(SrcMatcherConfig::comp_level(level)),
            output_segment_size: MAX_WIN_SIZE,
            format,
            match_trgt,
            sec_comp,
            naive_tests: None,
            lazy_escape_len: None,
        }
    }
}
impl Default for EncoderConfig {
    fn default() -> Self {
        Self {
            match_src: Some(SrcMatcherConfig::comp_level(3)),
            output_segment_size: MAX_WIN_SIZE,
            format: Format::Interleaved,
            match_trgt: None,
            sec_comp: None,
            naive_tests: None,
            lazy_escape_len: None,
        }
    }
}
/// Encodes a delta file based on the given configuration and inputs.
/// # Arguments
/// * `dict` - The source file to use as a dictionary. If None, the source file will not be used.
/// * `output` - The target file to encode.
/// * `writer` - The writer to write the encoded data to.
/// * `config` - The configuration to use for the encoder.
/// # Errors
/// Returns an error if there was an issue reading the source or target files, or writing the encoded data.
pub fn encode<R: std::io::Read+std::io::Seek, W: std::io::Write>(dict: Option<&mut R>, output: &mut R, writer: &mut W,config:&EncoderConfig) -> std::io::Result<()> {
    //this simple encoder will just read all the bytes to memory.
    let mut src_bytes = Vec::new();
    if let Some(r) = dict {
        r.read_to_end(&mut src_bytes)?;
    }
    let mut trgt_bytes = Vec::new();
    output.read_to_end(&mut trgt_bytes)?;
    let src = src_bytes.as_slice();
    let trgt = trgt_bytes.as_slice();
    let EncoderConfig { match_src, match_trgt, sec_comp, format,output_segment_size, naive_tests, lazy_escape_len } = config.clone();
    let segment_size = output_segment_size.min(MAX_WIN_SIZE).max(MAX_INST_SIZE);
    let mut inner_config = GenericEncoderConfig{
        match_trgt,
        match_src,
        lazy_escape_len,
        naive_tests,
    };
    let segments = encoder::encode_inner(&mut inner_config, src, trgt);
    // dbg!(&inner_config);
    let ops = translate_inner_ops(trgt, segments);
    let mut cur_o_pos: usize = 0;
    let mut win_data = Vec::new();
    for (seg_ops,mut header) in make_sections(&ops, segment_size){
        header.format = format;
        debug_assert!({
            let mut o = cur_o_pos;
            seg_ops.iter().all(
                |op| {
                    let len = op.oal() as usize;
                    let test = &trgt[o..o + len];
                    o += len;
                    match op{
                        Op::Add(Add { bytes }) => test == &bytes[..],
                        Op::Copy(Copy { src:CopySrc::Dict, addr, len }) => test == &src[*addr as usize..*addr as usize + *len as usize],
                        Op::Copy(Copy { src:CopySrc::Output, addr, len }) => test == &trgt[*addr as usize..*addr as usize + *len as usize],
                        Op::Run(Run{ byte, .. }) => test.iter().all(|b| b == byte),
                    }
                }
            )
        });
        cur_o_pos += header.output_size as usize;
        section_writer(&sec_comp, header, writer, seg_ops, &mut win_data)?; //write the section
    }
    Ok(())
}


/// This just simplifies mapping a 0..9 comp_level to various ranges for various settings.
struct Ranger {
    input_range: Range<usize>,
    output_range: RangeInclusive<usize>,
    input_span: usize,
    output_span: usize,
    is_inverted: bool,
}

impl Ranger {
    fn new(input_range: Range<usize>, output_range: RangeInclusive<usize>) -> Self {
        let input_span = input_range.end - input_range.start - 1;
        let is_inverted = output_range.start() > output_range.end();
        let output_span = output_range.end().abs_diff(*output_range.start());

        Self { input_range, output_range, input_span, output_span, is_inverted }
    }

    fn map(&self, input_value: usize) -> usize {
        let input_value = input_value.clamp(self.input_range.start, self.input_range.end-1);
        let b = self.output_range.start().min(self.output_range.end());
        let m = input_value - self.input_range.start;
        //let m = if self.is_inverted {self.input_range.end - input_value}else{input_value-self.input_range.start};
        let output = b + ((self.output_span * m) / self.input_span);
        if self.is_inverted{
            self.output_span+self.output_range.end() - output + self.output_range.end()
        }else{
            output
        }
        //Some(output.clamp(*b, b+self.output_span))
    }
}



#[cfg(test)]
mod test_super {
    use super::*;


    #[test]
    fn test_regular_mapping() {
        let input_range = 1..11;
        let output_range = 1..=100;
        let interpolator = Ranger::new(input_range, output_range);

        assert_eq!(interpolator.map(1), 1);
        assert_eq!(interpolator.map(2), 12);
        assert_eq!(interpolator.map(3), 23);
        assert_eq!(interpolator.map(4), 34);
        assert_eq!(interpolator.map(5), 45);
        assert_eq!(interpolator.map(6), 56);
        assert_eq!(interpolator.map(7), 67);
        assert_eq!(interpolator.map(8), 78);
        assert_eq!(interpolator.map(9), 89);
        assert_eq!(interpolator.map(10), 100);
    }

    #[test]
    fn test_inverted_mapping() {
        let input_range = 1..11;
        let output_range = 100..=1; // Inverted range
        let interpolator = Ranger::new(input_range, output_range);

        assert_eq!(interpolator.map(1), 100);
        assert_eq!(interpolator.map(5), 56);
        assert_eq!(interpolator.map(10), 1);
    }

    #[test]
    fn test_out_of_range_input() {
        let input_range = 3..10;
        let output_range = 0..=100;
        let interpolator = Ranger::new(input_range, output_range);

        assert_eq!(interpolator.map(0), interpolator.map(3)); // Below range
        assert_eq!(interpolator.map(11), interpolator.map(10)); // Above range
    }

}