1use std::{fmt::Debug, io::{Read,Seek,Write}, ops::Range};
2
3use vcdiff_common::{CopyType, Inst, Instruction, WinIndicator, WindowSummary, ADD, COPY, RUN};
4use vcdiff_reader::{read_header, read_window_header, VCDReader, VCDiffReadMsg};
5
6
7
8fn find_dep_ranges(summaries: &[WindowSummary])->Vec<Range<u64>>{
9 let mut ranges = Vec::new();
10 for ws in summaries.iter().rev() {
11 if let WinIndicator::VCD_TARGET = ws.win_indicator {
12 let ssp = ws.source_segment_position.unwrap() as u64;
13 let sss = ws.source_segment_size.unwrap() as u64;
14 ranges.push(ssp..ssp+sss);
15 }
16 }
17 let mut ranges = merge_ranges(ranges);
18 ranges.sort_by(|a,b|b.start.cmp(&a.start));
20 ranges
21}
22pub fn gather_summaries<R: Read + Seek>(patch_data:&mut R)-> std::io::Result<Vec<WindowSummary>>{
24 let header = read_header(patch_data)?;
25 let mut summaries = Vec::new();
26 let mut win_start_pos = header.encoded_size() as u64;
27 while let Ok(ws) = read_window_header(patch_data, win_start_pos) {
28 win_start_pos = ws.end_of_window();
29 summaries.push(ws);
30 patch_data.seek(std::io::SeekFrom::Start(win_start_pos))?;
31 }
32 Ok(summaries)
33}
34fn merge_ranges<T: Ord + Copy>(ranges: Vec<Range<T>>) -> Vec<Range<T>> {
35 let mut result: Vec<Range<T>> = Vec::new();
36 let mut sorted_ranges = ranges;
37
38 sorted_ranges.sort_by(|a, b| a.start.cmp(&b.start));
40
41 for range in sorted_ranges {
43 if result.is_empty() || range.start > result.last().unwrap().end {
46 result.push(range);
47 } else {
48 let last_index = result.len() - 1;
51 result[last_index].end = std::cmp::max(result[last_index].end, range.end);
52 }
53 }
54
55 result
56}
57fn range_overlap<T: Ord + Copy>(range1: &Range<T>, range2: &Range<T>) -> Option<Range<T>> {
58 let start = std::cmp::max(range1.start, range2.start);
59 let end = std::cmp::min(range1.end, range2.end);
60
61 if start < end {
62 Some(Range { start, end })
63 } else {
64 None
65 }
66}
67pub fn apply_patch<R:Read+Seek+Debug,W:Write>(patch:&mut R,mut src:Option<&mut R>,sink:&mut W) -> std::io::Result<()> {
75 let windows = gather_summaries(patch)?;
78 let dependencies = find_dep_ranges(&windows);
79 let mut reader = VCDReader::new(patch)?;
80 let mut ws = None;
81 let mut cur_u = Vec::new();
82 let mut sink_cache = SparseCache::new();
83 let mut o_pos = 0;
84 let mut t_pos = 0;
85
86 loop{
89 let msg = reader.next()?;
90 match msg{
91 VCDiffReadMsg::WindowSummary(w) => {
92 let WindowSummary{
93 win_indicator,
94 source_segment_size,
95 source_segment_position,
96 size_of_the_target_window, ..
97 } = &w;
98 let needed_capacity = match source_segment_size{
99 Some(sss) => {
100 sss + *size_of_the_target_window
101 },
102 _ => *size_of_the_target_window,
103 } as usize;
104 debug_assert!(cur_u.is_empty());
106 if cur_u.capacity() < needed_capacity {
107 cur_u.reserve(needed_capacity - cur_u.capacity());
108 }
109 match (win_indicator,source_segment_position,source_segment_size,src.as_mut()) {
110 (WinIndicator::Neither,_,_,_) => (),
111 (WinIndicator::VCD_SOURCE, Some(ssp), Some(sss),Some(s)) => {
112 s.seek(std::io::SeekFrom::Start(*ssp))?;
113 cur_u.resize(*sss as usize, 0);
114 s.read_exact(&mut cur_u[..*sss as usize])?;
115 },
116 (WinIndicator::VCD_TARGET, Some(ssp), Some(sss),_) => {
117 let slice = sink_cache.get_src_subslice(*ssp..*ssp+*sss);
119 cur_u.extend_from_slice(slice); },
121 _ => panic!("Invalid window configuration"),
122 }
123 ws = Some(w)
124 },
125 VCDiffReadMsg::Inst { first, second, .. } => {
126 for inst in [Some(first),second]{
127 if inst.is_none() {break;}
128 let inst = inst.unwrap();
129 let len_in_o = inst.len_in_o() as usize;
130 match inst {
131 Inst::Add(ADD{ p_pos,.. }) => {
132 let patch_r = reader.get_reader(p_pos)?;
133 let mut slice = vec![0u8;len_in_o];
134 patch_r.read_exact(&mut slice)?;
135 cur_u.append(&mut slice);
136 },
138 Inst::Copy(COPY{ u_pos, len:copy_in_u,copy_type }) => {
139 let u_pos = u_pos as usize;
140 let cur_end = cur_u.len();
142 debug_assert!(u_pos < cur_end,"{:?} >= {:?}",u_pos,cur_end);
144 unsafe{
150 let cur_u_ptr = cur_u.as_mut_ptr();
152 let slice_ptr = cur_u.as_ptr();
153 cur_u.set_len(cur_u.len() + len_in_o);
155
156 let mut amt_copied = 0;
158 while amt_copied < len_in_o {
159 let (copy_len,source_offset) = if matches!(copy_type,CopyType::CopyQ { .. }) {
160 let seq_len = cur_end-u_pos;
161 let copy_len = std::cmp::min(len_in_o - amt_copied, seq_len);
162 let seq_offset = amt_copied % seq_len;
163 (copy_len,seq_offset+u_pos)
164 } else{debug_assert!(amt_copied == 0);
167 (copy_in_u as usize, u_pos as usize)
168 };
169 let dest_offset = cur_end + amt_copied;
171 std::ptr::copy_nonoverlapping(
173 slice_ptr.add(source_offset),
174 cur_u_ptr.add(dest_offset),
175 copy_len
176 );
177 amt_copied += copy_len;
178 }
179 }
180 },
181 Inst::Run(RUN{ byte, .. }) => {
182 cur_u.extend(std::iter::repeat(byte).take(len_in_o));
184 },
185 }
186 t_pos += len_in_o;
187 }
188 },
189 VCDiffReadMsg::EndOfWindow => {
190 let summary = ws.take().unwrap();
197 let t_len = summary.size_of_the_target_window;
198 assert_eq!(t_pos,t_len as usize,"{:?}",summary);
200 let t_start = summary.source_segment_size.unwrap_or(0) as usize;
203 let to_cache = merge_ranges(find_intersections(&(o_pos..o_pos+t_len), &dependencies));
204 for Range { start, end } in to_cache {
205 let len = end-start;
206 sink_cache.add(start, len as usize, &mut std::io::Cursor::new(&cur_u),t_start,o_pos).unwrap();
207 }
208 o_pos += t_len; t_pos = 0;
210 sink.write_all(&cur_u[t_start..t_start+t_len as usize])?;
211 cur_u.clear();
212 },
213 VCDiffReadMsg::EndOfFile => break,
214 }
215 }
217 Ok(())
218}
219
220
221#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
222struct Segment{
223 src_start: u64,
224 buf_start: usize,
225 len: usize,
226}
227
228impl Segment {
229 fn src_end(&self) -> u64 {
230 self.src_start + self.len as u64
231 }
232}
233struct SparseCache
237{
238 buffer: Vec<u8>,
239 segment_map: Vec<Segment>, }
241
242impl SparseCache
243{
244 pub fn new() -> Self {
245 Self {
246 buffer: Vec::new(),
247 segment_map: Vec::new(),
248 }
249 }
250
251 pub fn add<R:Read+Seek>(&mut self, o_pos: u64, len: usize, src: &mut R, t_start:usize, o_start_at_t_start:u64) -> Result<(), std::io::Error> {
252
253 let read_segments = self.prepare_to_add(o_pos, len,t_start,o_start_at_t_start);
254 for ((seg_start, seg_len),buf_pos) in read_segments {
255 src.seek(std::io::SeekFrom::Start(seg_start as u64))?;
256 src.read_exact(&mut self.buffer[buf_pos..buf_pos+seg_len])?;
257 }
258 Ok(())
259 }
260 fn prepare_to_add(&mut self,o_pos: u64, len: usize, t_start:usize, o_start_at_t_start:u64) -> Vec<((u64, usize), usize)> {
262 let mut missing_byte_sections = Vec::new();
263 let end = o_pos + len as u64; let mut current_start = o_pos; let mut new_map = Vec::with_capacity(self.segment_map.len() + 1);
269 let mut read_pos = (current_start - o_start_at_t_start) + t_start as u64;
270 let mut cur_shift = 0;
272 for mut seg in self.segment_map.drain(..) {
273 let segment_end = seg.src_end();
274 let Segment { src_start, buf_start, .. } = &mut seg;
275 if segment_end <= current_start {
276 new_map.push(seg);
277 continue;
279 }
280 if current_start < *src_start && cur_shift == 0{new_map.push(Segment{
282 src_start: o_pos,
283 buf_start:*buf_start,
284 len,
285 });
286 }
287 if *src_start > current_start{
288 let end_pos = std::cmp::min(*src_start, end);
291 let read_len = (end_pos - current_start) as usize;
292 cur_shift += read_len;
293 missing_byte_sections.push(((read_pos, read_len), *buf_start));
296 }
297 *buf_start += cur_shift;
299 new_map.push(seg);
300
301 current_start = std::cmp::max(current_start, segment_end);
303 read_pos = (current_start - o_start_at_t_start) + t_start as u64;
304 if current_start >= end {
305 break;
307 }
308 }
309
310 if current_start < end {
312 if missing_byte_sections.is_empty(){
314 new_map.push(Segment{
315 src_start: o_pos,
316 buf_start:self.buffer.len(),
317 len,
318 });
319 }
320 missing_byte_sections.push(((read_pos, (end - current_start) as usize), self.buffer.len()));
321 }
322 self.segment_map = new_map;
323 self.prepare_buffer(&missing_byte_sections);
324
325 missing_byte_sections
326 }
327 fn prepare_buffer(&mut self, splice_segments: &[((u64, usize),usize)]) {
329 if splice_segments.is_empty() {
330 return;
331 }
332 let total_new_elements: usize = splice_segments.iter().map(|((_, v),_)| v).sum();
334 let cur_size = self.buffer.len();
335 let final_size = cur_size + total_new_elements;
336
337 let mut new_vec = vec![0; final_size];
339
340 let mut old_buf_pos = 0;
342 let mut new_vec_pos = 0;
343 for ((_, insert_len),orig_buf_start) in splice_segments.iter() {
344 if old_buf_pos < cur_size{ let copy_len = orig_buf_start - old_buf_pos;
346 new_vec[new_vec_pos..new_vec_pos+copy_len].copy_from_slice(&self.buffer[old_buf_pos..old_buf_pos+copy_len]);
347 old_buf_pos += copy_len;
348 new_vec_pos += copy_len
349 }
350 new_vec_pos += insert_len; }
353 if old_buf_pos < cur_size {
355 new_vec[new_vec_pos..new_vec_pos+cur_size-old_buf_pos].copy_from_slice(&self.buffer[old_buf_pos..cur_size]);
356 }
357 self.buffer = new_vec;
358
359 }
360
361 pub fn get_src_subslice(&self, src_range: Range<u64>) -> &[u8] {
362 let mut slice_start = None;
365 let mut last_end = 0;
366 for Segment { src_start, buf_start, len } in &self.segment_map {
367 let len = *len as u64;
368 if let Some(in_range) = range_overlap(&(*src_start..*src_start + len), &src_range) {
369 let slice_end = src_start + len;
370 if slice_start.is_some() {
371 if last_end != *src_start {
372 panic!("Non-contiguous segments in the cache");
373 }
374 }else if in_range.start <= src_range.start{
375 slice_start = Some(*buf_start + (src_range.start - src_start) as usize);
376 }
377 if src_range.end <= slice_end {
378 let slice_end = *buf_start + len as usize - (slice_end - src_range.end) as usize;
379 return &self.buffer[slice_start.unwrap()..slice_end];
380 }
381 }
382 last_end = *src_start + len;
383 }
384
385 panic!("The slice is not fully within some existing keys range: {:?}",self.segment_map);
386 }
387}
388
389pub(crate) fn find_intersections<T: Ord + Copy>(reference_set: &Range<T>, test_sets: &[Range<T>]) -> Vec<Range<T>> {
390 let mut intersections = Vec::new();
391
392 for test_set in test_sets {
393 if let Some(overlap) = range_overlap(&reference_set, test_set) {
394 intersections.push(overlap);
395 }
396 }
397
398 intersections
399}
400#[cfg(test)]
424mod tests {
425 use super::*;
426 use std::io::Cursor;
427 #[test]
428 fn test_src_apply(){
429 let mut src = Cursor::new("hello".as_bytes().to_vec());
431
432 let patch = vec![
434 214,195,196,0, 0, 1, 4, 1, 12, 13, 0, 3, 2, 2, 72,33,32, 235, 183, 0,
449 4,
450 ];
451 let mut patch = Cursor::new(patch);
452 let mut sink = Vec::new();
453 apply_patch(&mut patch,Some(&mut src),&mut sink).unwrap();
454 assert_eq!(sink, "Hello! Hello!".as_bytes());
455 }
456 #[test]
457 fn test_complex_apply(){
458 let mut src = Cursor::new("hello".as_bytes().to_vec());
460
461 let patch = vec![
463 214,195,196,0, 0, 0, 7, 1, 0, 1, 1, 0, 72, 2, 1, 4, 1, 8, 5, 0, 1, 1, 1, 33, 253, 0, 2, 6, 0, 9, 7, 0, 1, 2, 1, 32, 2, 118, 0, ];
500 let mut patch = Cursor::new(patch);
501 let mut sink = Vec::new();
502 apply_patch(&mut patch,Some(&mut src),&mut sink).unwrap();
503 assert_eq!(sink, "Hello! Hello!".as_bytes());
504 }
505
506 #[test]
507 fn test_kitchen_sink(){
508 let mut src = Cursor::new("hello".as_bytes().to_vec());
510
511 let patch = vec![
513 214,195,196,0, 0, 0, 7, 1, 0, 1, 1, 0, 72, 2, 1, 4, 1, 8, 5, 0, 1, 1, 1, 33, 253, 0, 2, 6, 0, 9, 7, 0, 1, 2, 1, 32, 2, 118, 0, 2, 5, 6, 12, 8, 0, 1, 4, 2, 46, 117, 2, 35, 3, 0, 1, ];
566 let mut patch = Cursor::new(patch);
567 let mut sink = Vec::new();
568 apply_patch(&mut patch,Some(&mut src),&mut sink).unwrap();
569 assert_eq!(sink, "Hello! Hello! Hell...".as_bytes());
570
571 }
572
573#[test]
574 fn test_kitchen_sink2(){
575 let mut src = Cursor::new("hello world!".as_bytes().to_vec());
577
578 let patch = vec![
580 214,195,196,0, 0, 1, 11, 1, 14, 7, 0, 1, 5, 3, 72, 163, 19, 1, 19, 1, 0, 10, 4, 2, 7, 0, 14, 14, 0, 1, 5, 3, 46, 23, 28, 2, 19, 1, 0, 7, 13, ];
619 let mut patch = Cursor::new(patch);
620 let mut sink = Vec::new();
621 apply_patch(&mut patch,Some(&mut src),&mut sink).unwrap();
622 let str = std::str::from_utf8(&sink).unwrap();
623 assert_eq!(str, "Hello! Hello! Hello. ");
624
625 }
626 #[test]
627 fn test_add_and_retrieve() {
628 let mut cache = SparseCache::new();
629 let mut data = Cursor::new(vec![1, 2, 3, 4]);
630
631 cache.add( 3, 1, &mut data,0,0).unwrap();
633 cache.add( 0, 2, &mut data,0,0).unwrap();
634 cache.add( 2, 2, &mut data,0,0).unwrap();
635
636 let result = cache.get_src_subslice(3..4);
638 assert_eq!(result, &[4]);
639
640 let result = cache.get_src_subslice(0..2);
641 assert_eq!(result, &[1, 2]);
642
643 let result = cache.get_src_subslice(2..4);
644 assert_eq!(result, &[3, 4]);
645 }
646 #[test]
647 fn test_overlapping_segments() {
648 let mut cache = SparseCache::new();
649 let mut data = Cursor::new("hello world".as_bytes());
650
651 cache.add(0, 3, &mut data,0,0).unwrap();
652 cache.add( 6, 5, &mut data,0,0).unwrap();
653
654 assert_eq!(cache.get_src_subslice(0..3), "hel".as_bytes());
655 assert_eq!(cache.get_src_subslice(6..11), "world".as_bytes());
656 }
657 #[test]
658 fn test_get_subslice() {
659 let mut cache = SparseCache::new();
660 let first = "ABCDEFGHIJ".as_bytes();
661 let f_len = first.len(); let mut data = Cursor::new(first);
663
664 cache.add(0, 3, &mut data,0,0).unwrap();
665 let mut data = Cursor::new("KLMNOP".as_bytes());
666
667 cache.add(13, 3, &mut data,0,f_len as u64).unwrap();
668
669 let subslice1 = cache.get_src_subslice( 0..2);
670
671 assert_eq!(subslice1, "AB".as_bytes());
672 let subslice2 = cache.get_src_subslice( 13..15);
673 assert_eq!(subslice2, "NO".as_bytes());
674
675 }
676
677 #[test]
678 fn test_full_overlap() {
679 let reference_set = Range { start: 5, end: 15 };
680 let test_sets = vec![Range { start: 10, end: 12 }];
681 let expected = vec![Range { start: 10, end: 12 }];
682 assert_eq!(find_intersections(&reference_set, &test_sets), expected);
683 }
684
685 #[test]
686 fn test_partial_overlaps() {
687 let reference_set = Range { start: 0, end: 10 };
688 let test_sets = vec![
689 Range { start: 5, end: 15 },
690 Range { start: -2, end: 5 },
691 Range { start: 8, end: 12 },
692 ];
693 let expected = vec![
694 Range { start: 5, end: 10 },
695 Range { start: 0, end: 5 },
696 Range { start: 8, end: 10 },
697 ];
698 assert_eq!(find_intersections(&reference_set, &test_sets), expected);
699 }
700
701 #[test]
702 fn test_no_overlap() {
703 let reference_set = Range { start: 5, end: 10 };
704 let test_sets = vec![
705 Range { start: 0, end: 4 },
706 Range { start: 11, end: 15 },
707 ];
708 let expected: Vec<Range<u64>> = vec![]; assert_eq!(find_intersections(&reference_set, &test_sets), expected);
710 }
711
712 #[test]
713 fn test_empty_test_sets() {
714 let reference_set = Range { start: 1, end: 10 };
715 let test_sets: Vec<Range<u64>> = vec![];
716 let expected: Vec<Range<u64>> = vec![];
717 assert_eq!(find_intersections(&reference_set, &test_sets), expected);
718 }
719}