1use std::io::Read;
11
12use smdiff_common::{diff_addresses_to_u64, read_i_varint, read_u16, read_u8, read_u_varint, size_routine, AddOp, Copy, CopySrc, Format, Run, SectionHeader, Size, ADD, COPY_D, COPY_O, OP_MASK, RUN, SECTION_COMPRESSION_MASK, SECTION_COMPRESSION_RSHIFT, SECTION_CONTINUE_BIT, SECTION_FORMAT_BIT, SIZE_MASK};
13
14
15pub type Op = smdiff_common::Op<Add>;
17
18#[derive(Clone, Debug, PartialEq, Eq)]
20pub struct Add{
21 pub bytes: Vec<u8>,
22}
23impl Add{
24 pub fn new(bytes: Vec<u8>) -> Self {
25 Add { bytes }
26 }
27}
28
29impl AddOp for Add{
30 fn bytes(&self) -> &[u8] {
31 &self.bytes
32 }
33}
34pub fn read_section_header<R: std::io::Read>(reader: &mut R) -> std::io::Result<SectionHeader> {
36 let header_byte = read_u8(reader)?;
37 let compression_algo = (header_byte & SECTION_COMPRESSION_MASK) >> SECTION_COMPRESSION_RSHIFT;
38 let format = if header_byte & SECTION_FORMAT_BIT == SECTION_FORMAT_BIT{Format::Segregated} else {Format::Interleaved};
39 let more_sections = (header_byte & SECTION_CONTINUE_BIT) == SECTION_CONTINUE_BIT;
40 let num_operations = read_u_varint(reader)? as u32;
41 let num_add_bytes = if format.is_segregated() {
42 read_u_varint(reader)? as u32
43 } else {
44 0
45 };
46 let read_size = read_u_varint(reader)? as u32;
47 let output_size = if format.is_segregated(){
48 num_add_bytes + read_size
49 }else{
50 read_size
51 };
52
53 Ok(SectionHeader {
54 compression_algo,
55 format,
56 more_sections,
57 num_operations,
58 num_add_bytes,
59 output_size,
60 })
61}
62
63pub fn read_ops_no_comp<R: std::io::Read>(reader: &mut R, header:&mut SectionHeader,op_buffer:&mut Vec<Op>)-> std::io::Result<()>{
69 let SectionHeader { format, num_operations, output_size, .. } = header;
70 let mut cur_d_addr = 0;
72 let mut cur_o_addr = 0;
73 op_buffer.reserve(*num_operations as usize);
74 match format {
75 Format::Segregated => {
76 let buffer_offset = op_buffer.len();
77 let mut add_idxs = Vec::new();
78 let mut check_size = 0;
79 for i in 0..*num_operations {
81 let op = read_op(reader, &mut cur_d_addr, &mut cur_o_addr,false)?;
82 let len = op.oal() as u32;
83 check_size += len;
84 if op.is_add(){
85 header.num_add_bytes += len;
86 add_idxs.push(buffer_offset+i as usize);
87 }
88 op_buffer.push(op);
89 }
90 if &check_size != output_size{
91 return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, format!("Window Header output size: {} != Sum(ops.oal()) {}",output_size,check_size)));
92 }
93 for i in add_idxs{
96 let op = op_buffer.get_mut(i).unwrap();
97 if let Op::Add(add) = op{
98 reader.read_exact(&mut add.bytes)?;
99 }
100 }
101 Ok(())
102 },
103 Format::Interleaved => {
104 let mut check_size = 0;
105 for _ in 0..*num_operations {
106 let op = read_op(reader, &mut cur_d_addr, &mut cur_o_addr,true)?;
107 check_size += op.oal() as u32;
108 op_buffer.push(op);
109 }
110 if &check_size != output_size{
111 return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, format!("Window Header output size: {} != Sum(ops.oal()) {}",output_size,check_size)));
112 }
113 Ok(())
114 }
115 }
116}
117
118pub fn read_section<R: std::io::Read>(reader: &mut R, op_buffer:&mut Vec<Op>) -> std::io::Result<SectionHeader> {
122 let mut header = read_section_header(reader)?;
123 op_buffer.reserve(header.num_operations as usize);
124 read_ops_no_comp(reader, &mut header, op_buffer)?;
125 Ok(header)
126}
127
128#[derive(Copy, Clone, Debug, PartialEq, Eq)]
129enum OpType{
130 Copy{src:CopySrc},
131 Add,
132 Run
133}
134
135struct OpByte{
136 op:OpType,
137 size:Size
138}
139fn read_op_byte<R: std::io::Read>(reader: &mut R) -> std::io::Result<OpByte> {
140 let byte = read_u8(reader)?;
141 let size_indicator = byte & SIZE_MASK;
142 let op_type = byte & OP_MASK;
143
144 let size = size_routine(size_indicator as u16);
145 match op_type {
146 COPY_D => Ok(OpByte{op:OpType::Copy { src: CopySrc::Dict },size}),
147 COPY_O => Ok(OpByte{op:OpType::Copy { src: CopySrc::Output },size}),
148 ADD => Ok(OpByte{op:OpType::Add,size}),
149 RUN => Ok(OpByte{op:OpType::Run,size}),
150 _ => Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid op type")),
151 }
152}
153pub fn read_op<R: std::io::Read>(reader: &mut R,cur_d_addr:&mut u64,cur_o_addr:&mut u64,is_interleaved:bool) -> std::io::Result<Op> {
162 let OpByte { op, size } = read_op_byte(reader)?;
163 if matches!(op, OpType::Run) && !matches!(size, Size::Done(_)) {
164 return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid size for RUN operation"));
165 }
166 let size = match size {
167 Size::Done(size) => size as u16,
168 Size::U8And62 => read_u8(reader)? as u16 + 62,
169 Size::U16 => read_u16(reader)?,
170 };
171 let op = match op {
172 OpType::Copy { src } => {
173 let addr = read_i_varint(reader)?;
174 let len = size;
175 let addr = if src == CopySrc::Dict {
176 *cur_d_addr = diff_addresses_to_u64(*cur_d_addr, addr);
177 *cur_d_addr
178 } else {
179 *cur_o_addr = diff_addresses_to_u64(*cur_o_addr, addr);
180 *cur_o_addr
181 };
182 Op::Copy(Copy{src,addr,len})
183 },
184 OpType::Add => {
185 let mut bytes = vec![0u8;size as usize];
186 if is_interleaved{
187 reader.read_exact(&mut bytes)?;
188 }
189 Op::Add(Add{bytes})
190 },
191 OpType::Run => {
192 Op::Run(Run{len:size as u8,byte:read_u8(reader)?})
193 }
194 };
195 Ok(op)
196}
197
198pub struct SectionIterator<R>{
200 source: R,
201 done:bool,
202 op_buffer: Vec<Op>,
203}
204impl<R: Read> SectionIterator<R>{
205 pub fn new(source: R) -> Self {
206 Self {
207 source,
208 done:false,
209 op_buffer: Vec::new(),
210 }
211 }
212 pub fn next_borrowed(&mut self) -> Option<std::io::Result<(&[Op],SectionHeader)>> {
216 if self.done{
217 return None;
218 }
219 self.op_buffer.clear();
220 let header = match read_section(&mut self.source,&mut self.op_buffer){
221 Ok(v) => v,
222 Err(e) => return Some(Err(e)),
223
224 };
225 if !header.more_sections{
226 self.done = true;
227 }
228 Some(Ok((self.op_buffer.as_slice(),header)))
229 }
230 fn next_owned(&mut self) -> Option<std::io::Result<(Vec<Op>,SectionHeader)>> {
232 if self.done{
233 return None;
234 }
235 let mut op_buffer = Vec::new();
236 let header = match read_section(&mut self.source,&mut op_buffer){
237 Ok(v) => v,
238 Err(e) => return Some(Err(e)),
239
240 };
241 if !header.more_sections{
242 self.done = true;
243 }
244 Some(Ok((op_buffer,header)))
245 }
246 pub fn into_inner(self) -> R {
247 self.source
248 }
249}
250impl<R: Read> Iterator for SectionIterator<R> {
251 type Item = std::io::Result<(Vec<Op>, SectionHeader)>;
252
253 fn next(&mut self) -> Option<Self::Item> {
254 self.next_owned()
255 }
256}
257#[cfg(test)]
258mod tests {
259 use std::io::Cursor;
260
261 use super::*;
262
263
264 #[test]
265 fn test_basic_add_run() {
266 let ops= vec![
268 Op::Add(Add::new("he".as_bytes().to_vec())),
269 Op::Run(Run { byte: b'l', len: 2 }),
270 Op::Add(Add::new("o".as_bytes().to_vec())),
271 ];
272 let answer = vec![
273 0, 3, 5, 130, 104, 101, 194, 108, 129, 111 ];
284 let mut reader = SectionIterator::new(Cursor::new(answer));
285 while let Some(Ok((read_ops,_))) = reader.next_borrowed(){
286 for (op,answer) in read_ops.iter().zip(ops.clone()) {
287 assert_eq!(op, &answer);
288 }
289 }
290
291 }
292 #[test]
293 fn test_hello_micro() {
294 let ops= vec![
297 Op::Add(Add::new("H".as_bytes().to_vec())),
298 Op::Copy(Copy { src: CopySrc::Dict, addr: 1, len: 4 }),
299 Op::Add(Add::new("! ".as_bytes().to_vec())),
300 Op::Copy(Copy { src: CopySrc::Output, addr: 0, len: 6 }),
301 ];
302 let answer = vec![
303 0, 4, 13, 129, 72, 4, 2, 130, 33, 32, 70, 0, ];
316 let mut reader = SectionIterator::new(Cursor::new(answer));
317 while let Some(Ok((read_ops,_))) = reader.next_borrowed(){
318 for (op,answer) in read_ops.iter().zip(ops.clone()) {
319 assert_eq!(op, &answer);
320 }
321 }
322 }
323 #[test]
324 pub fn test_hello_win(){
325 let ops = [
331 vec![
332 Op::Add(Add::new("H".as_bytes().to_vec())),
333 ],
334 vec![
335 Op::Copy(Copy { src: CopySrc::Dict, addr: 1, len: 4 }),
336 ],
337 vec![
338 Op::Add(Add::new("! ".as_bytes().to_vec())),
339 ],
340 vec![
341 Op::Copy(Copy { src: CopySrc::Output, addr: 0, len: 6 }),
342 ]
343 ];
344
345 let answer = vec![
346 192, 1, 1, 0, 129, 72, 192, 1, 0, 4, 4, 2, 192, 1, 2, 0, 130, 33, 32, 64, 1, 0, 6, 70, 0, ];
375 let mut reader = SectionIterator::new(Cursor::new(answer));
376 let mut ops_iter = ops.iter();
377 while let Some(Ok((read_ops,_))) = reader.next_borrowed(){
378 let ans_ops = ops_iter.next().unwrap();
379 for (op,answer) in read_ops.iter().zip(ans_ops.clone()) {
380 assert_eq!(op, &answer);
381 }
382 }
383
384 }
385
386 #[test]
387 pub fn kitchen_sink_transform(){
388 let ops = [
397 vec![
398 Op::Add(Add::new("H".as_bytes().to_vec())),
399 ],
400 vec![
401 Op::Copy(Copy { src: CopySrc::Dict, addr: 1, len: 4 }),
402 ],
403 vec![
404 Op::Add(Add::new("! ".as_bytes().to_vec())),
405 ],
406 vec![
407 Op::Copy(Copy { src: CopySrc::Output, addr: 0, len: 6 }),
408 ],
409 vec![
410 Op::Copy(Copy { src: CopySrc::Output, addr: 6, len: 5 }),
411 ],
412 vec![
413 Op::Run(Run { byte: b'.', len: 3 }),
414 ],
415 ];
416
417 let answer = vec![
418 192, 1, 1, 0, 129, 72, 192, 1, 0, 4, 4, 2, 192, 1, 2, 0, 130, 33, 32, 192, 1, 0, 6, 70, 0, 192, 1, 0, 5, 69, 12, 64, 1, 0, 3, 195, 46, ];
461
462 let mut reader = SectionIterator::new(Cursor::new(answer));
463 let mut ops_iter = ops.iter();
464 while let Some(Ok((read_ops,_))) = reader.next_borrowed(){
465 let ans_ops = ops_iter.next().unwrap();
466 for (op,answer) in read_ops.iter().zip(ans_ops.clone()) {
467 assert_eq!(op, &answer);
468 }
469 }
470 }
471}
472