smdiff_vcdiff/
lib.rs

1use std::io::Seek;
2
3use smdiff_common::{AddOp, Copy, CopySrc, Format, Run, SectionHeader, MAX_RUN_LEN, MAX_WIN_SIZE};
4use smdiff_writer::{write_section_header, write_ops};
5use vcdiff_common::{CopyType, Inst, Instruction, WinIndicator, ADD, RUN};
6use vcdiff_reader::{VCDReader, VCDiffReadMsg};
7
8const MAX_INST_SIZE: u32 = u16::MAX as u32;
9pub type Op = smdiff_common::Op<Add>;
10
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub struct Add{
13    pub bytes: Vec<u8>,
14}
15
16impl AddOp for Add{
17    fn bytes(&self) -> &[u8] {
18        &self.bytes
19    }
20}
21
22pub fn convert_vcdiff_to_smdiff<R: std::io::Read+Seek, W: std::io::Write>(reader: R, mut writer: W) -> std::io::Result<()> {
23    let mut cur_win = Vec::new();
24    let mut reader = VCDReader::new(reader)?;
25    let mut ssp = None;
26    let mut vcd_trgt = false;
27    let mut cur_o_pos = 0;
28    let mut cur_win_size = 0;
29    let mut num_add_bytes = 0;
30    loop{
31        match reader.next()?{
32            VCDiffReadMsg::WindowSummary(ws) => {
33                ssp = ws.source_segment_position;
34                if ws.win_indicator == WinIndicator::VCD_TARGET{
35                    vcd_trgt = true;
36                }
37            },
38            VCDiffReadMsg::Inst { first, second } => {
39                for inst in [Some(first), second]{
40                    if inst.is_none(){
41                        continue;
42                    }
43                    let inst = inst.unwrap();
44                    match inst{
45                        Inst::Add(ADD{ len, p_pos }) => {
46                            let r = reader.get_reader(p_pos)?;
47                            let total: u32 = len;
48                            let mut processed: u32 = 0;
49                            while processed < total {
50                                let remaining = total - processed;
51                                let chunk_size = remaining.min(MAX_INST_SIZE);
52                                if cur_win_size + chunk_size > MAX_WIN_SIZE as u32{
53                                    let header = SectionHeader {
54                                        num_operations:cur_win.len() as u32,
55                                        num_add_bytes,
56                                        output_size:cur_win_size,
57                                        compression_algo: 0,
58                                        format: Format::Interleaved,
59                                        more_sections: true,
60
61                                    };
62                                    write_section_header(&header,&mut writer)?;
63                                    write_ops(&cur_win, &header,&mut writer)?;
64                                    cur_win.clear();
65                                    cur_win_size = 0;
66                                    num_add_bytes = 0;
67                                }
68                                let mut bytes = vec![0; chunk_size as usize];
69                                r.read_exact(&mut bytes)?;
70                                let op = Op::Add(Add{bytes});
71                                cur_win.push(op);
72                                processed += chunk_size;
73                                num_add_bytes += chunk_size;
74                                cur_win_size += chunk_size;
75                            }
76                            cur_o_pos += len as u64;
77                        },
78                        Inst::Run(RUN{ len, byte }) => {
79                            println!("Run @{}: of: {} let: {}",cur_o_pos,byte,len);
80                            let total: u32 = len;
81                            let mut processed: u32 = 0;
82                            while processed < total {
83                                let remaining = total - processed;
84                                let chunk_size = remaining.min(MAX_RUN_LEN as u32);
85                                if cur_win_size + chunk_size > MAX_WIN_SIZE as u32{
86                                    let header = SectionHeader {
87                                        num_operations:cur_win.len() as u32,
88                                        num_add_bytes,
89                                        output_size:cur_win_size,
90                                        compression_algo: 0,
91                                        format: Format::Interleaved,
92                                        more_sections: true,
93
94                                    };
95                                    write_section_header(&header,&mut writer)?;
96                                    write_ops(&cur_win, &header,&mut writer)?;
97                                    cur_win.clear();
98                                    cur_win_size = 0;
99                                    num_add_bytes = 0;
100                                }
101                                assert!(chunk_size <= MAX_RUN_LEN as u32);
102                                let op = Op::Run(Run{byte,len:chunk_size as u8});
103                                cur_win.push(op);
104                                cur_win_size += chunk_size;
105                                processed += chunk_size;
106                            }
107                            cur_o_pos += len as u64;
108                        },
109                        Inst::Copy(copy) =>{
110                            let (mut addr,src,seq) = match copy.copy_type{
111                                CopyType::CopyS => {
112                                    let ssp = ssp.expect("SSP not set");
113                                    let addr = ssp+copy.u_pos as u64;
114                                    let src = if vcd_trgt {CopySrc::Output}else{CopySrc::Dict};
115                                    (addr,src,None)
116                                },
117                                CopyType::CopyT { inst_u_pos_start } => {
118                                    let offset = inst_u_pos_start - copy.u_pos;
119                                    let addr = cur_o_pos - offset as u64;
120                                    let src = CopySrc::Output;
121                                    (addr,src,None)
122                                },
123                                CopyType::CopyQ { len_o } => {
124                                    let slice_len = copy.len_in_u() - len_o;
125                                    let addr = cur_o_pos - slice_len as u64;
126                                    let src = CopySrc::Output;
127                                    (addr,src,Some((slice_len,len_o)))
128                                },
129                            };
130                            if let Some((slice_len,seq_len)) = seq {
131                                let total: u32 = seq_len;
132                                let mut processed: u32 = 0;
133                                while processed < total {
134                                    let remaining = total - processed;
135                                    let chunk_size = remaining.min(slice_len);
136                                    //dbg!(slice_len,chunk_size);
137                                    if cur_win_size + chunk_size > MAX_WIN_SIZE as u32{
138                                        let header = SectionHeader {
139                                            num_operations:cur_win.len() as u32,
140                                            num_add_bytes,
141                                            output_size:cur_win_size,
142                                            compression_algo: 0,
143                                            format: Format::Interleaved,
144                                            more_sections: true,
145
146                                        };
147                                        write_section_header(&header,&mut writer)?;
148                                        write_ops(&cur_win, &header,&mut writer)?;
149                                        cur_win.clear();
150                                        cur_win_size = 0;
151                                        num_add_bytes = 0;
152                                    }
153                                    let op = Op::Copy(Copy{ src, addr, len: chunk_size as u16 });
154                                    cur_win.push(op);
155                                    processed += chunk_size;
156                                    cur_win_size += chunk_size;
157
158                                };
159                                cur_o_pos += seq_len as u64;
160                            }else{
161                                let total: u32 = copy.len_in_o();
162                                let mut processed: u32 = 0;
163                                while processed < total {
164                                    let remaining = total - processed;
165                                    let chunk_size = MAX_INST_SIZE.min(remaining);
166                                    if cur_win_size + chunk_size > MAX_WIN_SIZE as u32{
167                                        let header = SectionHeader {
168                                            num_operations:cur_win.len() as u32,
169                                            num_add_bytes,
170                                            output_size:cur_win_size,
171                                            compression_algo: 0,
172                                            format: Format::Interleaved,
173                                            more_sections: true,
174
175                                        };
176                                        write_section_header(&header,&mut writer)?;
177                                        write_ops(&cur_win, &header,&mut writer)?;
178                                        cur_win.clear();
179                                        cur_win_size = 0;
180                                        num_add_bytes = 0;
181                                    }
182                                    let op = Op::Copy(Copy{ src, addr, len: chunk_size as u16 });
183                                    cur_win.push(op);
184                                    addr += chunk_size as u64;
185                                    processed += chunk_size;
186                                    cur_win_size += chunk_size;
187
188                                };
189                                cur_o_pos += copy.len_in_u() as u64;
190                            }
191                        }
192                    }
193                }
194            },
195            VCDiffReadMsg::EndOfWindow => {
196                ssp = None;
197                vcd_trgt = false;
198            },
199            VCDiffReadMsg::EndOfFile => break,
200        }
201    }
202    //dbg!(cur_o_pos);
203    //now we determine what we need to write
204    let header = SectionHeader {
205        num_operations:cur_win.len() as u32,
206        num_add_bytes,
207        output_size:cur_win_size,
208        compression_algo: 0,
209        format: Format::Interleaved,
210        more_sections: false,
211
212    };
213    write_section_header(&header,&mut writer)?;
214    write_ops(&cur_win, &header,&mut writer)?;
215    Ok(())
216}
217
218#[cfg(test)]
219mod test_super {
220    use std::io::Cursor;
221
222    use super::*;
223
224    #[test]
225    fn test_hello_micro() {
226        //'hello' -> 'Hello! Hello!'
227        let mut vcd_bytes = Cursor::new(vec![
228            214,195,196,0, //magic
229            0, //hdr_indicator
230            1, //win_indicator VCD_SOURCE
231            4, //SSS
232            1, //SSP
233            12, //delta window size
234            13, //target window size
235            0, //delta indicator
236            3, //length of data for ADDs and RUNs
237            2, //length of instructions and sizes
238            2, //length of addresses for COPYs
239            72,33,32, //'H! ' data section
240            163, //ADD1 COPY4_mode6
241            183, //ADD2 COPY6_mode0
242            0,
243            4,
244        ]);
245        let smd_bytes = vec![
246            4, // 0b00_0_00100
247            129, //ADD, Size 1 0b10_000001
248            72, //'H'
249            4, //COPY_D, Size 4 0b00_000100
250            2, //addr ivar int +1
251            130, //ADD, Size 2 0b10_000010
252            33, //'!'
253            32, //' '
254            70, //COPY_O, Size 6 0b01_000110
255            0, //addr ivar int 0
256        ];
257        let mut out = Vec::new();
258        convert_vcdiff_to_smdiff(&mut vcd_bytes, &mut out).unwrap();
259        assert_eq!(out, smd_bytes);
260    }
261
262    #[test]
263    fn test_seq(){
264        // Instructions -> "" -> "tererest'
265        let mut vcd_bytes = Cursor::new(vec![
266            214, 195, 196, 0,  //magic
267            0,  //hdr_indicator
268            0, //win_indicator
269            13, //size_of delta window
270            8, //size of target window
271            0, //delta indicator
272            5, //length of data for ADDs and RUNs
273            2, //length of instructions and sizes
274            1, //length of addresses for COPYs
275            116, 101, 114, 115, 116, //data section b"terst" 12..17
276            200, //ADD size3 & COPY5_mode0
277            3, //ADD size 2
278            1, //addr for copy
279        ]);
280        let smd_bytes = vec![ //should be Add(ter), Copy(1,2), Copy(1,1),Add(st)
281            4, // 0b00_0_00100
282            131, //ADD, Size 3 0b10_000011
283            116, //'t'
284            101, //'e'
285            114, //'r'
286            66, //COPY_O, Size 2 0b01_000010
287            2, //addr ivar int +1
288            65, //COPY_O, Size 1 0b01_000011
289            0, //addr ivar int 0
290            130, //ADD, Size 2 0b10_000010
291            115, //'s'
292            116, //'t'
293        ];
294
295        let mut out = Vec::new();
296        convert_vcdiff_to_smdiff(&mut vcd_bytes, &mut out).unwrap();
297        assert_eq!(out, smd_bytes);
298
299    }
300
301    #[test]
302    fn test_run(){
303        // Instructions -> "" -> "r' x 128 long
304        let mut vcd_bytes = Cursor::new(vec![
305            214, 195, 196, 0,  //magic
306            0,  //hdr_indicator
307            0, //win_indicator
308            9, //size_of delta window
309            128, //size of target window
310            0, //delta indicator
311            1, //length of data for ADDs and RUNs
312            1, //length of instructions and sizes
313            1, //length of addresses for COPYs
314            114, //data section b"terst" 12..17
315            0, //RUN
316            129,0, //len 128
317        ]);
318        let smd_bytes = vec![ //should be Add(ter), Copy(1,2), Copy(1,1),Add(st)
319            3, // 0b00_0_00011
320            254, //RUN, Size 62 0b11_111110
321            114, //'r'
322            254, //RUN, Size 62 0b11_111110
323            114, //'r'
324            196, //RUN, Size 4 0b10_000100
325            114, //'r'
326        ];
327
328        let mut out = Vec::new();
329        convert_vcdiff_to_smdiff(&mut vcd_bytes, &mut out).unwrap();
330        assert_eq!(out, smd_bytes);
331
332    }
333}