1
2
3use std::io::{self, Read, Seek};
4
5use vcdiff_common::{decode_integer, Cache, CodeTableData, CodeTableEntry, CopyType, DeltaIndicator, Header, Inst, Instruction, TableInst, WinIndicator, WindowSummary, ADD, COPY, MAGIC, RUN, VCD_C_TABLE};
6
7#[derive(Copy, Clone, Debug, PartialEq, Eq)]
8enum VCDiffState {
9 Window{inst_sec_start:u64, addr_sec_start:u32,end_of_window:u32,sss:u64},
10 Instructions{addr_sec_start:u64,end_of_window:u32,sss:u64},
11 EoW,
12 EoF,
13}
14
15#[derive(Debug)]
17pub struct VCDReader<R> {
18 source: R,
19 pub header: Header,
20 caches: Cache,
21 cur_state: VCDiffState,
22 cur_pos: u64,
23 moved: bool,
24 addr_pos: u64,
25 data_pos: u64,
27 cur_u_position: u32,
30}
31
32impl<R: Read + Seek> VCDReader<R> {
33 pub fn new(mut source: R) -> io::Result<Self> {
38 source.seek(io::SeekFrom::Start(0))?;
40 let header = read_header(&mut source)?;
41 if let Some(_) = header.code_table_data.as_ref(){
42 unimplemented!("Application-Defined Code Tables are not supported.")
43 }
44 if let Some(_) = header.secondary_compressor_id{
45 unimplemented!("Secondary compressors are not supported.")
46 }
47 Ok(Self {
48 source,
49 cur_pos:header.encoded_size() as u64,
50 header,
51 cur_state: VCDiffState::EoW,
52 moved: false,
53 caches: Cache::new(),
54 addr_pos: 0,
55 data_pos: 0,
56 cur_u_position: 0,
57
58 })
59 }
60 pub fn into_inner(self)->R{
62 self.source
63 }
64 pub fn seek_to_window(&mut self,win_start_pos:u64){
70 self.cur_pos = win_start_pos;
71 self.cur_state = VCDiffState::EoW;
72 self.moved = true; }
74 pub fn read_from_src(&mut self,from_start:u64, buf:&mut [u8])->io::Result<()>{
80 self.get_reader(from_start)?.read_exact(buf)
81 }
82 pub fn get_reader(&mut self,at_from_start:u64)->io::Result<&mut R>{
87 self.moved = true;
88 self.source.seek(io::SeekFrom::Start(at_from_start))?;
89 Ok(&mut self.source)
90 }
91 fn resume(&mut self)->std::io::Result<()>{
94 if self.moved {self.source.seek(io::SeekFrom::Start(self.cur_pos))?;}
95 Ok(())
96 }
97
98 fn read_as_inst(&mut self)->io::Result<Option<VCDiffReadMsg>>{
99 self.resume()?;
100 if let VCDiffState::Instructions { addr_sec_start, end_of_window,.. } = &self.cur_state {
101 if self.cur_pos >= *addr_sec_start{
102 let pos = addr_sec_start + *end_of_window as u64;
104 self.cur_pos = self.source.seek(io::SeekFrom::Start(pos))?;
105 self.cur_state = VCDiffState::EoW;
106 return Ok(Some(VCDiffReadMsg::EndOfWindow));
107 }
108 let mut buffer = [0; 1];
109 self.source.read_exact(&mut buffer)?;
110 self.cur_pos += 1;
111 Ok(Some(self.decode_inst(buffer[0])?))
112 }else{
113 Ok(None)
114 }
115 }
116 fn decode_inst(&mut self,byte:u8)->io::Result<VCDiffReadMsg>{
117 debug_assert!(matches!(self.cur_state,VCDiffState::Instructions{..}));
118 let CodeTableEntry{ first, second } = VCD_C_TABLE[byte as usize];
119 let f = if let Some(inst) = self.handle_inst(first)? {
120 inst
121 }else{
122 panic!("NoOp is not allowed in the first position of an opcode"); };
124 let s = self.handle_inst(second)?;
125
126 Ok(VCDiffReadMsg::Inst{first: f, second: s})
127
128 }
129 fn handle_inst(&mut self, inst: TableInst) -> std::io::Result<Option<Inst>> {
130 let inst = match inst{
131 TableInst::Run => {
132 let len = self.read_as_inst_size_unchecked()? as u32;
134 let mut byte = [0u8];
135 self.read_from_src(self.data_pos, &mut byte)?;
136 self.data_pos += 1;
137 Inst::Run(RUN { len: len as u32, byte: byte[0] })
138 },
139 TableInst::Add { size } => {
140 let len = if size == 0 {
141 self.read_as_inst_size_unchecked()? as u32
142 }else{size as u32};
143 let pos = self.data_pos;
144 self.data_pos += len as u64;
145 Inst::Add(ADD { len: len as u32, p_pos: pos })
146 },
147 TableInst::Copy { size, mode } => {
148 let len = if size == 0 {
149 self.read_as_inst_size_unchecked()? as u32
150 }else{size as u32};
151 let (value,read) = if mode < Cache::SAME_START as u8 {
152 decode_integer(self.get_reader(self.addr_pos)?)?
154 }else{
155 let mut byte = [0u8];
157 self.read_from_src(self.addr_pos, &mut byte)?;
158 (byte[0] as u64, 1)
159 };
160 let addr = self.caches.addr_decode(value, self.cur_u_position as u64, mode as usize) as u32;
161 if addr >= self.cur_u_position{
162 return Err(std::io::Error::new(
163 std::io::ErrorKind::InvalidData,
164 format!("Address is out of bounds: {:?} >= {:?} (mode: {} len: {}) encoded {} ",
165 addr, self.cur_u_position,mode,len,value))
166 );
167 }
168 self.addr_pos += read as u64;
169 let sss = match self.cur_state{
170 VCDiffState::Instructions{sss,..} => sss,
171 _ => panic!("Invalid State!"),
172 };
173 let end_pos = addr + len;
174 let copy_type = if end_pos > self.cur_u_position{if (addr as u64) < sss {
177 return Err(std::io::Error::new(
178 std::io::ErrorKind::InvalidData,
179 format!("CopyT (Sequence) must be entirely in T! Position in U: {} Copy Address: {} Copy Len: {}, End Position in U: {}, Source Segment Size: {}",
180 self.cur_u_position, addr,len,end_pos, sss
181 )
182 ));
183 }
184 let len_o = (end_pos - self.cur_u_position) as u32;
185 CopyType::CopyQ{len_o}
186 }else if end_pos as u64 <= sss{CopyType::CopyS
188 }else{if (addr as u64) < sss {
190 return Err(std::io::Error::new(
191 std::io::ErrorKind::InvalidData,
192 format!("CopyT (Non-Sequence) must be entirely in T! Position in U: {} Copy Address: {} Copy Len: {}, End Position in U: {}, Source Segment Size: {}",
193 self.cur_u_position, addr,len,end_pos, sss
194 )
195 ));
196 }
197 CopyType::CopyT{inst_u_pos_start:self.cur_u_position}
198 };
199 Inst::Copy(COPY { len: len as u32, u_pos: addr,copy_type })
200 },
201 TableInst::NoOp => return Ok(None),
202 };
203 self.cur_u_position += inst.len_in_o();
204 Ok(Some(inst))
205 }
206 fn read_as_inst_size_unchecked(&mut self)->io::Result<u64>{
210 self.resume()?;
211 let (integer,len) = decode_integer(&mut self.source)?;
212 self.cur_pos += len as u64;
213 Ok(integer)
214 }
215 pub fn next(&mut self) -> io::Result<VCDiffReadMsg> {
217 match self.cur_state {
218 VCDiffState::Window { inst_sec_start, addr_sec_start, end_of_window,sss } => {
219 self.cur_pos = self.source.seek(io::SeekFrom::Start(inst_sec_start))?;
224 assert!(addr_sec_start> 0);
226 let a = inst_sec_start + addr_sec_start as u64;
227 self.cur_state = VCDiffState::Instructions { addr_sec_start:a, end_of_window,sss };
228 match self.read_as_inst()? {
229 Some(v) => Ok(v),
230 None => {
231 panic!("Instructions state should always have a next opcode");
233 }
234 }
235 },
236 VCDiffState::Instructions { .. } => {
237 match self.read_as_inst() {
238 Ok(Some(v)) => Ok(v),
239 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
240 self.cur_state = VCDiffState::EoF;
241 Ok(VCDiffReadMsg::EndOfFile)
242 }
243 Err(e) => Err(e),
244 Ok(None) => {
245 panic!("Instructions state should always have a next opcode");
247 }
248 }
249 },
250 VCDiffState::EoW => {
251 self.resume()?;
252 match read_window_header(&mut self.source,self.cur_pos) {
253 Ok(ws) => {
254 self.cur_pos += ws.win_hdr_len() as u64;
255 self.cur_state = VCDiffState::Window {
256 inst_sec_start: ws.inst_sec_start(),
257 addr_sec_start: (ws.addr_sec_start() - ws.inst_sec_start()) as u32,
258 end_of_window: (ws.end_of_window() - ws.addr_sec_start()) as u32,
259 sss: ws.source_segment_size.unwrap_or(0),
260 };
261 self.addr_pos = ws.addr_sec_start();
262 self.data_pos = ws.data_sec_start();
263 self.cur_u_position = ws.source_segment_size.unwrap_or(0) as u32;
264 self.caches = Cache::new();
265 Ok(VCDiffReadMsg::WindowSummary(ws))
266 }
267 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
268 self.cur_state = VCDiffState::EoF;
269 Ok(VCDiffReadMsg::EndOfFile)
270 }
271 Err(e) => return Err(e),
272 }
273 },
274 VCDiffState::EoF => Ok(VCDiffReadMsg::EndOfFile),
275 }
276 }
277}
278
279pub fn read_header<R: Read>(source: &mut R) -> io::Result<Header> {
281 let mut buffer = [0; 4]; source.read_exact(&mut buffer)?;
283 if buffer != MAGIC {
285 return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid VCDiff header"));
286 }
287
288 let mut byte_buffer = [0; 1];
290 source.read_exact(&mut byte_buffer)?;
291 let hdr_indicator = byte_buffer[0];
292
293 let has_secondary_compressor = hdr_indicator & 0x01 != 0;
295 let has_code_table = hdr_indicator & 0x02 != 0;
296
297 let secondary_compressor_id = if has_secondary_compressor {
298 source.read_exact(&mut byte_buffer)?;
299 Some(byte_buffer[0]) } else {
301 None
302 };
303
304 let code_table_data = if has_code_table {
305 source.read_exact(&mut byte_buffer)?;
306 let size_of_near_cache = byte_buffer[0];
307 source.read_exact(&mut byte_buffer)?;
308 let size_of_same_cache = byte_buffer[0];
309 let (length_of_compressed_code_table,_) = decode_integer(source)?;
310 let mut compressed_code_table_data = vec![0u8; length_of_compressed_code_table as usize];
311 source.read_exact(&mut compressed_code_table_data)?;
312
313 Some(CodeTableData {
314 size_of_near_cache,
315 size_of_same_cache,
316 compressed_code_table_data,
317 })
318 } else {
319 None
320 };
321
322 Ok(Header {
323 hdr_indicator,
324 secondary_compressor_id,
325 code_table_data,
326 })
327}
328pub fn read_window_header<R: Read>(source: &mut R, win_start_pos: u64) -> io::Result<WindowSummary>{
335 let mut buffer = [0; 1];
336 source.read_exact(&mut buffer)?;
337 let win_indicator = WinIndicator::from_u8(buffer[0]);
338 let (source_segment_size,source_segment_position) = match win_indicator {
339 WinIndicator::VCD_SOURCE |
340 WinIndicator::VCD_TARGET => {
341 let (sss,_) = decode_integer(source)?;
342 let (ssp,_) = decode_integer(source)?;
343 (Some(sss), Some(ssp))
344 },
345 WinIndicator::Neither => (None,None),
346 };
347
348 let (length_of_the_delta_encoding,_) = decode_integer(source)?;
349 let (size_of_the_target_window,_) = decode_integer(source)?;
350 source.read_exact(&mut buffer)?;
351 let delta_indicator = DeltaIndicator::from_u8(buffer[0]);
352 let (length_of_data_for_adds_and_runs,_) = decode_integer(source)?;
353 let (length_of_instructions_and_sizes,_) = decode_integer(source)?;
354 let (length_of_addresses_for_copys,_) = decode_integer(source)?;
355 Ok(WindowSummary{
356 win_start_pos,
357 win_indicator,
358 source_segment_size,
359 source_segment_position,
360 length_of_the_delta_encoding,
361 size_of_the_target_window,
362 delta_indicator,
363 length_of_data_for_adds_and_runs,
364 length_of_instructions_and_sizes,
365 length_of_addresses_for_copys,
366 })
367
368}
369#[derive(Clone, Debug, PartialEq, Eq)]
371pub enum VCDiffReadMsg {
372 WindowSummary(WindowSummary),
373 Inst{first: Inst, second: Option<Inst>},
374 EndOfWindow,
375 EndOfFile,
376}
377
378#[cfg(test)]
379mod test_super {
380 use vcdiff_common::{DeltaIndicator, WinIndicator, WindowSummary};
381
382 use super::*;
383 const TEST_PATCH: [u8;42] = [
384 214,195,196,0,
386 0,1,4,0,33,28,0,24,3,1,119,120,121,122,101,102,
399 103,104,101,102,103,104,
400 101,102,103,104,101,102,103,104,122,122,122,122,
402 20,1,24,0,];
405 const TEST_WINDOW: WindowSummary = WindowSummary{
406 win_start_pos: 5,
407 win_indicator: WinIndicator::VCD_SOURCE,
408 source_segment_size: Some(4),
409 source_segment_position: Some(0),
410 length_of_the_delta_encoding: 33,
411 size_of_the_target_window: 28,
412 delta_indicator: DeltaIndicator(0),
413 length_of_data_for_adds_and_runs: 24,
414 length_of_instructions_and_sizes: 3,
415 length_of_addresses_for_copys: 1,
416 };
417 #[test]
418 fn test_header() {
419 let mut reader = std::io::Cursor::new(&TEST_PATCH);
420 let header = read_header(&mut reader).unwrap();
421 assert_eq!(header.hdr_indicator, 0);
422 assert_eq!(header.secondary_compressor_id, None);
423 assert_eq!(header.code_table_data, None);
424 assert_eq!(header.encoded_size(), 5);
425 }
426 #[test]
427 fn test_win_header(){
428 let mut reader = std::io::Cursor::new(&TEST_PATCH);
429 let header = read_header(&mut reader).unwrap();
430 let pos = header.encoded_size() as u64;
431 let ws = read_window_header(&mut reader,pos).unwrap();
432 assert_eq!(ws, TEST_WINDOW);
433 assert_eq!(ws.win_hdr_len(), 9);
434 assert_eq!(ws.data_sec_start(), 14);
435 assert_eq!(ws.inst_sec_start(), 38);
436 assert_eq!(ws.addr_sec_start(), 41);
437 assert_eq!(ws.end_of_window(), 42);
438 }
439 #[test]
440 fn test_decode() {
441 let insts = [
443 VCDiffReadMsg::WindowSummary(WindowSummary{
444
445 source_segment_size: Some(4),
446 source_segment_position: Some(1),
447 length_of_the_delta_encoding: 12,
448 size_of_the_target_window: 13,
449 delta_indicator: DeltaIndicator::from_u8(0),
450 length_of_data_for_adds_and_runs: 3,
451 length_of_instructions_and_sizes: 2,
452 length_of_addresses_for_copys: 2,
453 win_start_pos: 5,
454 win_indicator: WinIndicator::VCD_SOURCE,
455 }),
456 VCDiffReadMsg::Inst{first: Inst::Add(ADD{len: 1, p_pos: 14}), second: Some(Inst::Copy(COPY { len:4, u_pos:0,copy_type:CopyType::CopyS }))},
457 VCDiffReadMsg::Inst{first: Inst::Add(ADD{len: 2, p_pos: 15}), second: Some(Inst::Copy(COPY { len:6, u_pos:4,copy_type:CopyType::CopyT { inst_u_pos_start: 11 } }))},
458 VCDiffReadMsg::EndOfWindow,
459 VCDiffReadMsg::EndOfFile,
460 ];
461 let bytes = vec![
462 214,195,196,0, 0, 1, 4, 1, 12, 13, 0, 3, 2, 2, 72,33,32, 163, 183, 0,
477 4,
478 ];
479 let reader = std::io::Cursor::new(&bytes);
480 let mut dec = VCDReader::new(reader.clone()).unwrap();
481 for check in insts.into_iter(){
482 let msg = dec.next().unwrap();
483 assert_eq!(msg, check, "{:?} != {:?}", msg, check);
484 }
485 }
486 #[test]
487 fn test_seq(){
488 let patch = vec![
490 214, 195, 196, 0, 0, 0, 13, 8, 0, 5, 2, 1, 116, 101, 114, 115, 116, 200, 3, 1, ];
504 let patch = std::io::Cursor::new(patch);
505 let mut dec = VCDReader::new(patch).unwrap();
506 let insts = [
507 VCDiffReadMsg::WindowSummary(WindowSummary{
508 source_segment_size: None,
509 source_segment_position: None,
510 length_of_the_delta_encoding: 13,
511 size_of_the_target_window: 8,
512 delta_indicator: DeltaIndicator::from_u8(0),
513 length_of_data_for_adds_and_runs: 5,
514 length_of_instructions_and_sizes: 2,
515 length_of_addresses_for_copys: 1,
516 win_start_pos: 5,
517 win_indicator: WinIndicator::Neither,
518 }),
519 VCDiffReadMsg::Inst{first: Inst::Add(ADD{len: 3, p_pos: 12}), second: Some(Inst::Copy(COPY { len:5, u_pos:1,copy_type:CopyType::CopyQ {len_o: 3 } }))},
520 VCDiffReadMsg::Inst{first: Inst::Add(ADD{len: 2, p_pos: 15}), second: None},
521 VCDiffReadMsg::EndOfWindow,
522 VCDiffReadMsg::EndOfFile,
523 ];
524 for check in insts.into_iter(){
525 let msg = dec.next().unwrap();
526 assert_eq!(msg, check, "{:?} != {:?}", msg, check);
527 }
528 }
529
530 #[test]
531 fn kitchen_sink_transform2(){
532 let patch = vec![
533 214,195,196,0, 0, 1, 11, 1, 14, 7, 0, 1, 5, 3, 72, 163, 19, 1, 19, 1, 0, 10, 4, 2, 7, 0, 14, 14, 0, 1, 5, 3, 46, 23, 28, 2, 19, 1, 0, 7, 13, ];
572 use CopyType::*;
573 let insts = [
574 VCDiffReadMsg::WindowSummary(WindowSummary{
575 win_indicator: WinIndicator::VCD_SOURCE,
576 source_segment_size: Some(11),
577 source_segment_position: Some(1),
578 size_of_the_target_window:7 ,
579 delta_indicator: DeltaIndicator(0),
580 length_of_the_delta_encoding: 14,
581 length_of_data_for_adds_and_runs: 1,
582 length_of_instructions_and_sizes: 5,
583 length_of_addresses_for_copys: 3,
584 win_start_pos: 5,
585 }),
586 VCDiffReadMsg::Inst{first: Inst::Add(ADD{len: 1, p_pos: 14}), second: Some(Inst::Copy(COPY { len:4, u_pos:0,copy_type:CopyType::CopyS }))},
587 VCDiffReadMsg::Inst { first: Inst::Copy(COPY { len: 1, u_pos: 10, copy_type: CopyS }), second: None },
588 VCDiffReadMsg::Inst { first: Inst::Copy(COPY { len: 1, u_pos: 4, copy_type: CopyS }), second: None },
589 VCDiffReadMsg::EndOfWindow,
590 VCDiffReadMsg::WindowSummary(WindowSummary{
591 win_start_pos: 23,
592 win_indicator: WinIndicator::VCD_TARGET,
593 source_segment_size: Some(7),
594 source_segment_position: Some(0),
595 length_of_the_delta_encoding: 14,
596 size_of_the_target_window: 14,
597 delta_indicator: DeltaIndicator(0),
598 length_of_data_for_adds_and_runs: 1,
599 length_of_instructions_and_sizes: 5,
600 length_of_addresses_for_copys: 3
601 }),
602 VCDiffReadMsg::Inst { first: Inst::Copy(COPY { len: 7, u_pos: 0, copy_type: CopyS }), second: None },
603 VCDiffReadMsg::Inst { first: Inst::Copy(COPY { len: 12, u_pos: 7, copy_type: CopyQ { len_o: 5 } }), second: None },
604 VCDiffReadMsg::Inst{first: Inst::Add(ADD{len: 1, p_pos: 32}), second: None},
605 VCDiffReadMsg::Inst { first: Inst::Copy(COPY { len: 1, u_pos: 13, copy_type: CopyT { inst_u_pos_start: 20 } }), second: None },
606
607 VCDiffReadMsg::EndOfWindow,
608 VCDiffReadMsg::EndOfFile
609 ];
610 let patch = std::io::Cursor::new(patch);
611 let mut dec = VCDReader::new(patch).unwrap();
612
613 for check in insts.into_iter(){
628 let msg = dec.next().unwrap();
629 assert_eq!(msg, check, "{:?} != {:?}", msg, check);
630 }
631
632
633
634 }
635}