smdiff_common/
lib.rs

1
2/// Bits for the operation type
3pub const OP_MASK: u8 = 0b11000000;
4/// Copy from dictionary bit flag value
5pub const COPY_D: u8 = 0b00000000;
6/// Copy from output bit flag value
7pub const COPY_O: u8 = 0b01000000;
8/// Add bit flag value
9pub const ADD: u8 = 0b10000000;
10/// Run bit flag value
11pub const RUN: u8 = 0b11000000;
12/// Bits for the Size Value
13pub const SIZE_MASK: u8 = 0b00111111;
14/// Inclusive Upper Bound
15pub const MAX_RUN_LEN:u8 = 62;
16/// Inclusive Upper Bound
17pub const MAX_INST_SIZE:usize = u16::MAX as usize;
18/// Inclusive Upper Bound
19pub const MAX_WIN_SIZE:usize = (1<<24) - 1; // 16MB
20
21/// Section Header Continue Bit
22pub const SECTION_CONTINUE_BIT: u8 = 0b10000000;
23/// Section Header Format Bit
24pub const SECTION_FORMAT_BIT: u8 = 0b01000000;
25/// Section Header Compression Mask
26pub const SECTION_COMPRESSION_MASK: u8 = 0b00111000;
27/// Section Header Compression Right Shift amount
28pub const SECTION_COMPRESSION_RSHIFT: u8 = 3;
29/// Section Header Version Mask
30pub const VERSION_MASK: u8 = 0b00000111;
31
32/// Format of the how a section is laid out.
33/// * Interleaved: The Add bytes follow each Add Opertion
34/// * Segregated: All Add bytes are at the end of the section
35/// default: Interleaved
36#[derive(Copy,Clone,Debug, PartialEq, Eq)]
37pub enum Format {
38    /// The Add bytes follow each Add Opertion
39    Interleaved,
40    /// All Add bytes are at the end of the section
41    Segregated,
42}
43impl Default for Format {
44    fn default() -> Self {
45        Format::Interleaved
46    }
47}
48
49impl Format {
50    pub fn is_interleaved(&self) -> bool {
51        matches!(self, Format::Interleaved{..})
52    }
53    pub fn is_segregated(&self) -> bool {
54        matches!(self, Format::Segregated)
55    }
56}
57/// Header struct for a section
58#[derive(Copy,Clone,Debug,Default, PartialEq)]
59pub struct SectionHeader {
60    /// Should be a value between 0-7 per the spec
61    pub compression_algo: u8,
62    pub format: Format,
63    /// true if there are more sections to decode after this one.
64    /// false if this is the last section
65    /// default: false
66    pub more_sections: bool,
67    /// Total number of operations in the section
68    pub num_operations: u32,
69    /// Total Add bytes at the end of the section (not needed for interleaved format)
70    pub num_add_bytes: u32,
71    /// Total output size generated from the operations in this section
72    /// Maximum value should not exceed (1<<24) - 1
73    pub output_size: u32,
74}
75
76impl SectionHeader {
77    /// Create a new SectionHeader with the given parameters
78    /// * num_operations: Total number of operations in the section
79    /// * num_add_bytes: Total Add bytes at the end of the section (not needed for interleaved format)
80    /// * output_size: Total output size generated from the operations in this section
81    pub fn new(num_operations: u32, num_add_bytes: u32, output_size: u32) -> Self {
82        Self { num_operations, num_add_bytes, output_size, ..Default::default() }
83    }
84    /// Set the compression algorithm to use for this section
85    /// * compression_algo: Should be a value between 0-7 per the spec
86    pub fn set_compression_algo(mut self, compression_algo: u8) -> Self {
87        self.compression_algo = compression_algo.clamp(0, 7);
88        self
89    }
90    pub fn set_format(mut self, format: Format) -> Self {
91        self.format = format;
92        self
93    }
94    /// Indicate if this section is *not* the last section
95    pub fn set_more_sections(mut self, more_sections: bool) -> Self {
96        self.more_sections = more_sections;
97        self
98    }
99
100    pub fn is_compressed(&self) -> bool {
101        self.compression_algo != 0
102    }
103    pub fn is_interleaved(&self) -> bool {
104        self.format.is_interleaved()
105    }
106}
107
108/// Trait for the Add Operation
109/// Depending on the usage, we may want to store the Add bytes in different ways
110/// This trait allows for different implementations of the Add Operation within an Op
111pub trait AddOp{
112    /// Get the bytes for the Add Operation
113    fn bytes(&self) -> &[u8];
114}
115
116/// Run Operation
117/// * byte: The byte to repeat
118/// * len: The number of times to repeat the byte (1-62)
119#[derive(Clone, Debug, PartialEq, Eq)]
120pub struct Run{
121    pub byte: u8,
122    pub len: u8,
123}
124/// Copy Operation
125/// * src: The source of the copy (Dict or Output)
126/// * addr: The absolute start position in the src to start copying from
127/// * len: The number of bytes to copy.
128#[derive(Copy, Clone, Debug, PartialEq, Eq)]
129pub struct Copy{
130    pub src: CopySrc,
131    pub addr: u64,
132    pub len: u16,
133}
134
135/// Where the Copy Operation should copy from.
136/// * Dict: Copy from the dictionary (source file)
137/// * Output: Copy from the output buffer (output file)
138#[derive(Copy, Clone, Debug, PartialEq, Eq)]
139pub enum CopySrc{
140    Dict,
141    Output,
142}
143/// Enum for the different types of operations
144/// * Run: Repeat a byte a number of times
145/// * Copy: Copy bytes from a source to the output
146/// * Add: Add bytes to the output
147#[derive(Clone, Debug, PartialEq, Eq)]
148pub enum Op<A>{
149    Run(Run),
150    Copy(Copy),
151    Add(A),
152}
153impl<A> Op<A> {
154    /// Get the bit flag for the operation type.
155    /// This is per the spec.
156    pub fn bit_flag(&self) -> u8 {
157        match self {
158            Op::Run(_) => RUN,
159            Op::Copy(copy) => match copy.src {
160                CopySrc::Dict => COPY_D,
161                CopySrc::Output => COPY_O,
162            },
163            Op::Add(_) => ADD,
164        }
165    }
166    pub fn is_add(&self) -> bool {
167        matches!(self, Op::Add(_))
168    }
169    pub fn is_run(&self) -> bool {
170        matches!(self, Op::Run(_))
171    }
172    pub fn is_copy(&self) -> bool {
173        matches!(self, Op::Copy(_))
174    }
175    pub fn take_copy(&self) -> Option<Copy> {
176        if let Op::Copy(copy) = self {
177            Some(*copy)
178        } else {
179            None
180        }
181    }
182}
183impl<A:AddOp> Op<A> {
184    /// Get the total number of bytes this operation will generate in the output stream.
185    pub fn oal(&self) -> u16 {
186        match &self {
187            Op::Add(add) => add.bytes().len() as u16,
188            Op::Copy(copy) => copy.len,
189            Op::Run(run) => run.len as u16,
190        }
191    }
192}
193
194
195/// Used to determine how the size should be handled.
196///
197/// This is mostly to aid in control flow for encoding/decoding the size of an operation.
198#[derive(Copy, Clone, Debug, PartialEq, Eq)]
199pub enum Size{
200    Done(u8),
201    U8And62,
202    U16
203}
204
205impl Size {
206    /// How many bytes will the Size Indicator be in the patch file.
207    /// * 0 represents no Size Indicator is present.
208    pub fn size_overhead(&self) -> usize {
209        match self {
210            Size::Done(_) => 0,
211            Size::U8And62 => 1,
212            Size::U16 => 2,
213        }
214    }
215}
216/// Used to determine how an operation of `size` (oal) should be encoded.
217#[inline]
218pub fn size_routine(size: u16)->Size{
219    match size {
220        1..=62 => Size::Done(size as u8),
221        63 => Size::U8And62,
222        _ => Size::U16,
223    }
224}
225
226/// Convert an i64 to a u64 using ZigZag encoding.
227#[inline]
228pub fn zigzag_encode(n: i64) -> u64 {
229    ((n << 1) ^ (n >> 63)) as u64
230}
231
232/// Convert a u64 to an i64 using ZigZag decoding.
233#[inline]
234pub fn zigzag_decode(z: u64) -> i64 {
235    ((z >> 1) as i64) ^ -((z & 1) as i64)
236}
237
238/// Write a u64 value to the writer using u-varint encoding.
239pub fn write_u_varint<W: std::io::Write>(writer: &mut W, mut n: u64) -> std::io::Result<()> {
240    let mut buf = [0u8; 10]; // Max length for u-varint encoding of u64
241    let mut i = 0;
242    while n >= 0x80 {
243        buf[i] = ((n & 0x7F) | 0x80) as u8;
244        n >>= 7;
245        i += 1;
246    }
247    buf[i] = n as u8;
248    writer.write_all(&buf[..=i])
249}
250
251/// Read a u64 value from the reader at its current position using u-varint decoding.
252pub fn read_u_varint<R: std::io::Read>(reader: &mut R) -> std::io::Result<u64> {
253    let mut result = 0u64;
254    let mut shift = 0;
255    let mut b = [0u8; 1];
256    loop {
257        reader.read_exact(&mut b)?;
258        let byte = b[0];
259        result |= ((byte & 0x7F) as u64) << shift;
260        if byte & 0x80 == 0 {
261            break;
262        }
263        shift += 7;
264    }
265    Ok(result)
266}
267
268/// Write an i64 value to the writer using i-varint encoding.
269pub fn write_i_varint<W: std::io::Write>(writer: &mut W, n: i64) -> std::io::Result<()> {
270    write_u_varint(writer, zigzag_encode(n as i64))
271}
272
273/// Read an i64 value from the reader at its current position using i-varint decoding.
274pub fn read_i_varint<R: std::io::Read>(reader: &mut R) -> std::io::Result<i64> {
275    Ok(zigzag_decode(read_u_varint(reader)?))
276}
277
278/// Read a u8 value from the reader at its current position.
279pub fn read_u8<R: std::io::Read>(reader: &mut R) -> std::io::Result<u8> {
280    let mut buf = [0u8; 1];
281    reader.read_exact(&mut buf)?;
282    Ok(buf[0])
283}
284
285/// Write a u8 value to the writer.
286pub fn write_u8<W: std::io::Write>(writer: &mut W, n: u8) -> std::io::Result<()> {
287    writer.write_all(&[n])
288}
289
290/// Read a u16(little-endian) value from the reader at its current position.
291pub fn read_u16<R: std::io::Read>(reader: &mut R) -> std::io::Result<u16> {
292    let mut buf = [0u8; 2];
293    reader.read_exact(&mut buf)?;
294    Ok(u16::from_le_bytes(buf))
295}
296
297/// Write a u16(little-endian) value to the writer.
298pub fn write_u16<W: std::io::Write>(writer: &mut W, n: u16) -> std::io::Result<()> {
299    writer.write_all(&n.to_le_bytes())
300}
301
302/// Helper fn to determine how many bytes a given u64 will take when encoded using u-varint.
303#[inline]
304pub fn u_varint_encode_size(n: u64) -> usize {
305    let mut size = 1;
306    let mut n = n >> 7;
307    while n > 0 {
308        size += 1;
309        n >>= 7;
310    }
311    size
312}
313
314/// Helper fn to 'decode' a copy address from an i64 to the actual absolute address.
315#[inline]
316pub fn diff_addresses_to_u64(cur: u64, input: i64) -> u64 {
317    if input >= 0 {
318        // Safe to convert i64 to u64 because i is non-negative
319        cur.wrapping_add(input as u64)
320    } else {
321        // i is negative, so convert the absolute value to u64 and subtract
322        let positive_i = input.abs() as u64; // This is safe because `abs()` of a negative i64 can always fit in u64
323        cur.wrapping_sub(positive_i)
324    }
325}
326
327/// Helper fn to 'encode' a copy address from an absolute position (u64) to the relative difference from the last used absolute address.
328#[inline]
329pub fn diff_addresses_to_i64(cur: u64, target: u64) -> i64 {
330    if target > cur {
331        (target - cur) as i64
332    } else {
333        (cur - target) as i64 * -1
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340    use std::io::Cursor;
341
342    #[test]
343    fn test_ivarint() {
344        let mut buffer = Vec::new();
345        let values = [0, -1, 1, -2, 2, 1024, -1025, i64::MAX, i64::MIN];
346        for &val in &values {
347            buffer.clear();
348            write_i_varint(&mut buffer, val).unwrap();
349            let mut cursor = Cursor::new(&buffer);
350            let decoded = read_i_varint(&mut cursor).unwrap();
351            assert_eq!(val, decoded, "Failed encoding/decoding {}", val);
352        }
353    }
354    #[test]
355    fn test_ivarint_vals() {
356        let mut buffer = Vec::new();
357        let values = [
358            -64,63, // min 3 byte match
359            -8192,8191, //min 4 byte match
360            -1048576,1048575, //min 5 byte match
361            -134217728,134217727, //min 6 byte match
362            -123456789
363        ];
364        for &val in &values {
365            buffer.clear();
366            let zigged = zigzag_encode(val);
367            write_i_varint(&mut buffer, val).unwrap();
368            println!("i64: {} buffer:{:?} zigzagged: {}",val,buffer,zigged);
369            let mut cursor = Cursor::new(&buffer);
370            let decoded = read_i_varint(&mut cursor).unwrap();
371            assert_eq!(val, decoded, "Failed encoding/decoding {}", val);
372        }
373    }
374    #[test]
375    fn test_add_i64_to_u64() {
376        // Positive i64 addition
377        assert_eq!(diff_addresses_to_u64(10, 5), 15);
378        assert_eq!(diff_addresses_to_u64(0, i64::MAX), i64::MAX as u64);
379
380        // Negative i64 addition
381        assert_eq!(diff_addresses_to_u64(20, -5), 15);
382        assert_eq!(diff_addresses_to_u64(10, -15), 0xFFFFFFFFFFFFFFFB); // Check wrapping behavior
383
384        // Edge cases
385        assert_eq!(diff_addresses_to_u64(0, -1), 0xFFFFFFFFFFFFFFFF);
386        assert_eq!(diff_addresses_to_u64(u64::MAX, 1), 0); // Check overflow
387    }
388
389    #[test]
390    fn test_u64_to_i64_diff() {
391        // Positive difference
392        assert_eq!(diff_addresses_to_i64(5, 10), 5);
393
394        // Negative difference
395        assert_eq!(diff_addresses_to_i64(10, 5), -5);
396
397        // Zero difference
398        assert_eq!(diff_addresses_to_i64(20, 20), 0);
399    }
400}