smdiff_decoder/
lib.rs

1use std::io::{Read, Seek, Write};
2
3use smdiff_common::MAX_INST_SIZE;
4
5pub mod zstd{
6    //! Re-exports the zstd streaming decoder used
7    pub use ruzstd::streaming_decoder::StreamingDecoder;
8}
9pub mod brotli{
10    //! Re-exports the brotli decompressor reader used
11    pub use brotlic::DecompressorReader;
12}
13pub mod reader;
14///Applies an SMDiff patch to a source buffer
15/// # Arguments
16/// * `patch` - A Read object that contains the SMDiff patch data
17/// * `src` - An optional mutable reference to a Read+Seek object that contains the source (dictionary) data
18/// * `sink` - A Write object that will receive the patched data
19/// # Errors
20/// Returns an error if there is an issue reading from the patch or source data, or writing to the sink
21///
22/// Note: To enable patch application to large files, we require Read+Seek on the Sink to handle CopySrc::Output operations
23pub fn apply_patch<P:Read+Seek,R:Read+Seek,W:Write+Read+Seek>(patch:&mut P,mut src:Option<&mut R>,sink:&mut W) -> std::io::Result<()> {
24    let mut cur_o_pos: usize = 0;
25    //let mut stats = Stats::default();
26    let mut reader = crate::reader::SectionIterator::new(patch);
27    while let Some(res) = reader.next_borrowed(){
28        let (ops,_header) = res?;
29        apply_ops(ops, &mut src, sink, &mut cur_o_pos)?;
30    }
31    Ok(())
32}
33
34
35fn apply_no_sec_comp<P:Read,R:Read+Seek,W:Write+Read+Seek>(patch:&mut P,mut src:Option<&mut R>,sink:&mut W) -> std::io::Result<()> {
36    //To avoid Seek on write, we must write all the output data to a Vec<u8> first
37    let mut cur_o_pos = 0;
38    let mut reader = smdiff_reader::SectionIterator::new(patch);
39    while let Some(res) = reader.next_borrowed(){
40        let (ops,_header) = res?;
41        apply_ops(ops, &mut src, sink, &mut cur_o_pos)?;
42
43    }
44    Ok(())
45}
46
47/// Applies a series of operations to a buffer
48/// Here `cur_o` represents the output buffer.
49/// We could replace it with W:Write+Read+Seek if we didn't want to allocate the entire output buffer in memory
50/// So... maybe TODO?
51fn apply_ops<R:Read+Seek,W:Write+Read+Seek>(ops:&[smdiff_reader::Op],src:&mut Option<&mut R>,cur_o:&mut W, cur_o_pos: &mut usize) -> std::io::Result<()> {
52    //let mut stats = Stats::default();
53    //let out_size = header.output_size as usize;
54    cur_o.seek(std::io::SeekFrom::Start(*cur_o_pos as u64))?;
55    // cur_o.reserve_exact(out_size);
56    // cur_o.resize(cur_o.len() + out_size, 0);
57    let mut copy_buffer = vec![0u8;MAX_INST_SIZE];
58    for op in ops {
59        match op {
60            smdiff_common::Op::Add(add) => {
61                cur_o.write_all(&add.bytes)?;
62                *cur_o_pos += add.bytes.len();
63                //stats.add();
64            },
65            smdiff_common::Op::Copy(copy) => {
66                match copy.src{
67                    smdiff_common::CopySrc::Dict => {
68                        let src = match src.as_mut(){
69                            Some(s) => s,
70                            None => panic!("Copy operation without source data"),
71                        };
72                        src.seek(std::io::SeekFrom::Start(copy.addr))?;
73                        let len = copy.len as usize;
74                        src.read_exact(&mut copy_buffer[..len])?;
75                        cur_o.write_all(&copy_buffer[..len])?;
76                        *cur_o_pos += len;
77                    },
78                    smdiff_common::CopySrc::Output => {
79                        let start_pos = *cur_o_pos;
80                        cur_o.seek(std::io::SeekFrom::Start(copy.addr as u64))?;
81                        let len = copy.len as usize;
82                        cur_o.read_exact(&mut copy_buffer[..len])?;
83                        cur_o.seek(std::io::SeekFrom::Start(start_pos as u64))?;
84                        cur_o.write_all(&copy_buffer[..len])?;
85                        *cur_o_pos += len;
86                    },
87                }
88                //stats.copy();
89            },
90            smdiff_common::Op::Run(run) => {
91                //stats.run();
92                let len = run.len as usize;
93                copy_buffer[..len].fill(run.byte);
94                cur_o.write_all(&copy_buffer[..len])?;
95                *cur_o_pos += len;
96            },
97        }
98    }
99
100    Ok(())
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use std::io::Cursor;
107    #[test]
108    fn test_src_apply(){
109        // "hello" -> "Hello! Hello!"
110        let mut src = Cursor::new("hello".as_bytes().to_vec());
111
112        //from encoder tests
113        let patch = vec![
114            0, // 0b0_0_000_000
115            4, //num_ops uvarint
116            13, //output size uvarint
117            129, //ADD, Size 1 0b10_000001
118            72, //'H'
119            4, //COPY_D, Size 4 0b00_000100
120            2, //addr ivar int +1
121            130, //ADD, Size 2 0b10_000010
122            33, //'!'
123            32, //' '
124            70, //COPY_O, Size 6 0b01_000110
125            0, //addr ivar int 0
126        ];
127        let mut patch = Cursor::new(patch);
128        let mut sink = Cursor::new(Vec::new());
129        apply_patch(&mut patch,Some(&mut src),&mut sink).unwrap();
130        assert_eq!(sink.into_inner(), "Hello! Hello!".as_bytes());
131    }
132    #[test]
133    fn test_complex_apply(){
134        // "hello" -> "Hello! Hello!"
135        let mut src = Cursor::new("hello".as_bytes().to_vec());
136
137        //from encoder tests
138        let patch = vec![
139            192, // 0b1_1_000_000
140            1, //Num ops uvarint
141            1, //Num add bytes uvarint
142            0, //Output size uvarint diff encoded from add uvarint
143            129, //ADD, Size 1 0b10_000001
144            72, //'H'
145
146            192, // 0b1_1_000_000
147            1, //Num ops uvarint
148            0, //Num add bytes uvarint
149            4, //Output size uvarint diff encoded from add uvarint
150            4, //COPY_D, Size 4 0b00_000100
151            2, //addr ivar int +1
152
153            192, // 0b1_1_000_000
154            1, //Num ops uvarint
155            2, //Num add bytes uvarint
156            0, //Output size uvarint diff encoded from add uvarint
157            130, //ADD, Size 2 0b10_000010
158            33, //'!'
159            32, //' '
160
161            64, // 0b0_1_000_000
162            1, //Num ops uvarint
163            0, //Num add bytes uvarint
164            6, //Output size uvarint diff encoded from add uvarint
165            70, //COPY_O, Size 6 0b01_000110
166            0, //addr ivar int 0
167        ];
168        let mut patch = Cursor::new(patch);
169        let mut sink = Cursor::new(Vec::new());
170        apply_patch(&mut patch,Some(&mut src),&mut sink).unwrap();
171        assert_eq!(sink.into_inner(), "Hello! Hello!".as_bytes());
172    }
173
174    #[test]
175    fn test_kitchen_sink(){
176        // "hello" -> "Hello! Hello! Hell..."
177        let mut src = Cursor::new("hello".as_bytes().to_vec());
178
179        //from encoder tests
180        let patch = vec![
181            192, // 0b1_1_000_000
182            1, //Num ops uvarint
183            1, //Num add bytes uvarint
184            0, //Output size uvarint diff encoded from add uvarint
185            129, //ADD, Size 1 0b10_000001
186            72, //'H'
187
188            192, // 0b1_1_000_000
189            1, //Num ops uvarint
190            0, //Num add bytes uvarint
191            4, //Output size uvarint diff encoded from add uvarint
192            4, //COPY_D, Size 4 0b00_000100
193            2, //addr ivar int +1
194
195            192, // 0b1_1_000_000
196            1, //Num ops uvarint
197            2, //Num add bytes uvarint
198            0, //Output size uvarint diff encoded from add uvarint
199            130, //ADD, Size 2 0b10_000010
200            33, //'!'
201            32, //' '
202
203            192, // 0b1_1_000_000
204            1, //Num ops uvarint
205            0, //Num add bytes uvarint
206            6, //Output size uvarint diff encoded from add uvarint
207            70, //COPY_O, Size 6 0b01_000110
208            0, //addr ivar int 0
209
210            192, // 0b1_1_000_000
211            1, //Num ops uvarint
212            0, //Num add bytes uvarint
213            5, //Output size uvarint diff encoded from add uvarint
214            69, //COPY_O, Size 5 0b01_000100
215            12, //addr ivar int +6
216
217            64, // 0b0_1_000_000
218            1, //Num ops uvarint
219            0, //Num add bytes uvarint
220            3, //Output size uvarint diff encoded from add uvarint
221            195, //Run, Size 3 0b11_000011
222            46, //'.'
223        ];
224        let mut patch = Cursor::new(patch);
225        let mut sink = Cursor::new(Vec::new());
226        apply_patch(&mut patch,Some(&mut src),&mut sink).unwrap();
227        assert_eq!(sink.into_inner(), "Hello! Hello! Hell...".as_bytes());
228
229    }
230}