vcdiff_merger/
lib.rs

1
2
3use std::{fmt::Debug, io::{Read, Seek, Write}, num::NonZeroU32, ops::Range};
4
5use vcdiff_common::{CopyType, Header, Inst, InstType, Instruction, WinIndicator, ADD, COPY, RUN};
6use vcdiff_reader::{VCDReader, VCDiffReadMsg};
7use vcdiff_writer::{VCDWriter, WriteInst, WriteWindowHeader};
8
9
10///Disassociated Copy (from the window it was found in).
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub struct DCopy{
13    pub u_pos:u32,
14    pub len_u:u32,
15    pub sss:u32,
16    pub ssp:u64,
17    pub vcd_trgt:bool,
18    pub copy_type:CopyType,
19}
20impl MergeInst for DCopy{
21    fn skip(&mut self,amt:u32){
22        self.u_pos += amt;
23        self.len_u -= amt;
24        match self.copy_type{
25            CopyType::CopyQ{..} => {
26                panic!("CopyQ should not be skipped!");
27            },
28            _ => {}
29        }
30    }
31    fn trunc(&mut self,amt:u32){
32        self.len_u = self.len_u - amt;
33        match self.copy_type{
34            CopyType::CopyQ{..} => {
35                panic!("CopyQ should not be truncated!");
36            },
37            _ => {}
38        }
39    }
40    fn src_range(&self)->Option<Range<u64>>{
41        let new_ssp = self.ssp() + self.u_start_pos() as u64;
42        let new_end = new_ssp + self.len_in_u() as u64;
43        debug_assert!(new_end <= self.ssp() + self.sss() as u64,
44            "new_end:{} ssp:{} sss:{}",new_end,self.ssp(),self.sss() as u64
45        );
46        Some(new_ssp..new_end)
47    }
48
49}
50impl Instruction for DCopy{
51    fn len_in_u(&self)->u32{
52        self.len_u
53    }
54
55    fn inst_type(&self)->InstType {
56        InstType::Copy(self.copy_type)
57    }
58
59}
60impl DCopy{
61    fn from_copy(copy:COPY,ssp:u64,sss:u32,vcd_trgt:bool)->Self{
62        let COPY { len, u_pos, copy_type } = copy;
63        DCopy{
64            u_pos,
65            len_u:len,
66            sss,
67            ssp,
68            vcd_trgt,
69            copy_type,
70        }
71    }
72    fn u_start_pos(&self)->u32{
73        self.u_pos
74    }
75    fn ssp(&self)->u64{
76        self.ssp
77    }
78    fn sss(&self)->u32{
79        self.sss
80    }
81    fn vcd_trgt(&self)->bool{
82        self.vcd_trgt
83    }
84}
85
86///Extracted Add instruction.
87#[derive(Clone, Debug, Default, PartialEq, Eq)]
88pub struct ExAdd{
89    pub bytes:Vec<u8>,
90}
91impl Instruction for ExAdd{
92    fn len_in_u(&self)->u32{
93        self.bytes.len() as u32
94    }
95    fn inst_type(&self)->InstType {
96        InstType::Add
97    }
98}
99impl MergeInst for ExAdd{
100    fn skip(&mut self,amt:u32){
101        self.bytes = self.bytes.split_off(amt as usize);
102    }
103    fn trunc(&mut self,amt:u32){
104        self.bytes.truncate(self.bytes.len() - amt as usize);
105    }
106    fn src_range(&self)->Option<Range<u64>> {
107        None
108    }
109}
110impl MergeInst for RUN{
111    fn skip(&mut self,amt:u32){
112        self.len -= amt;
113    }
114    fn trunc(&mut self,amt:u32){
115        self.len = self.len - amt;
116    }
117    fn src_range(&self)->Option<Range<u64>> {
118        None
119    }
120}
121
122///Disassociated Instruction.
123#[derive(Clone, Debug, PartialEq, Eq)]
124pub enum DInst{
125    Add(ExAdd),
126    Run(RUN),
127    Copy(DCopy),
128}
129impl Instruction for DInst{
130    fn len_in_u(&self)->u32{
131        match self{
132            DInst::Add(bytes) => bytes.len_in_u(),
133            DInst::Run(run) => run.len,
134            DInst::Copy(copy) => copy.len_in_u(),
135        }
136    }
137    fn inst_type(&self)->InstType{
138        match self{
139            DInst::Add(_) => InstType::Add,
140            DInst::Run(_) => InstType::Run,
141            DInst::Copy(copy) => copy.inst_type(),
142        }
143    }
144}
145impl MergeInst for DInst{
146    fn skip(&mut self,amt:u32){
147        match self{
148            DInst::Add(bytes) => bytes.skip(amt),
149            DInst::Run(run) => run.skip(amt),
150            DInst::Copy(copy) => copy.skip(amt),
151        }
152    }
153    fn trunc(&mut self,amt:u32){
154        match self{
155            DInst::Add(bytes) => bytes.trunc(amt),
156            DInst::Run(run) => run.trunc(amt),
157            DInst::Copy(copy) => copy.trunc(amt),
158        }
159    }
160    fn src_range(&self)->Option<Range<u64>>{
161        match self{
162            DInst::Add(_) => None,
163            DInst::Run(_) => None,
164            DInst::Copy(copy) => copy.src_range(),
165        }
166    }
167}
168impl DInst {
169    pub fn take_copy(self)->Option<DCopy>{
170        match self{
171            DInst::Copy(copy) => Some(copy),
172            _ => None,
173        }
174    }
175    pub fn vcd_trgt(&self)->bool{
176        match self{
177            DInst::Add(_) => false,
178            DInst::Run(_) => false,
179            DInst::Copy(copy) => copy.vcd_trgt(),
180        }
181    }
182}
183
184
185///Extracted Instruction with a starting position.
186#[derive(Clone, Debug, PartialEq, Eq)]
187pub struct SparseInst{
188    pub o_pos_start:u64,
189    pub inst:DInst,
190}
191impl Instruction for SparseInst{
192    fn len_in_u(&self)->u32{
193        self.inst.len_in_u()
194    }
195    fn inst_type(&self)->InstType{
196        self.inst.inst_type()
197    }
198}
199impl MergeInst for SparseInst{
200    fn skip(&mut self,amt:u32){
201        self.o_pos_start += amt as u64;
202        self.inst.skip(amt);
203    }
204    fn trunc(&mut self,amt:u32){
205        self.inst.trunc(amt);
206    }
207
208    fn src_range(&self)->Option<Range<u64>>{
209        self.inst.src_range()
210    }
211}
212impl PosInst for SparseInst{
213    fn o_start(&self)->u64{
214        self.o_pos_start
215    }
216}
217
218pub trait PosInst:MergeInst{
219    fn o_start(&self)->u64;
220}
221pub trait MergeInst:Instruction{
222    ///Shorten the 'front' of the instruction
223    fn skip(&mut self,amt:u32);
224    ///Truncate off the 'back' of the instruction
225    fn trunc(&mut self,amt:u32);
226    ///If this is a Copy, what is the ssp..ssp+sss that would contain exactly this one instruction.
227    fn src_range(&self)->Option<Range<u64>>;
228    fn will_fit_window(&self,max_space_avail:NonZeroU32)->Option<NonZeroU32>{
229        //can we figure out how much to truncate to fit in the space?
230        match self.src_range(){
231            None => {
232                //Add or Run
233                let cur_u_len = self.len_in_u();
234                if cur_u_len <= max_space_avail.into(){
235                    return None;
236                }else{
237                    Some(max_space_avail)
238                }
239            },
240            Some(min) => {//Copy Math
241                //every change in len, also shrinks the sss
242                let cur_s_len = min.end - min.start;
243                let cur_u_len = self.len_in_u() + cur_s_len as u32;
244                if cur_u_len <= max_space_avail.into() {
245                    return None;
246                }else{
247                    let new_len = max_space_avail.get() as u64 - cur_s_len;
248                    if new_len == 0{
249                        None
250                    }else{
251                        Some(NonZeroU32::new(new_len as u32).unwrap())
252                    }
253                }
254            }
255        }
256    }
257    fn split_at(mut self,first_inst_len:u32)->(Self,Self){
258        assert!(!self.is_implicit_seq());
259        let mut second = self.clone();
260        self.trunc(self.len_in_u() - first_inst_len);
261        second.skip(first_inst_len);
262        (self,second)
263    }
264}
265
266
267///Determines the next WinIndicator based on the current instruction type, the current WinIndicator, and the instruction's VCD target status.
268/// Returns None if the new instruction does not change the current WinIndicator.
269/// # Arguments
270/// * `inst_type` - The type of the instruction.
271/// * `cur_ind` - The current WinIndicator.
272/// * `vcd_trgt` - The VCD target status of the instruction.
273pub fn comp_indicator(inst_type: &InstType, cur_ind: &WinIndicator, vcd_trgt: bool) -> Option<WinIndicator> {
274    match (inst_type, cur_ind, vcd_trgt) {
275        (InstType::Copy { .. }, WinIndicator::VCD_SOURCE, true)
276        | (InstType::Copy { .. }, WinIndicator::Neither, true) => Some(WinIndicator::VCD_TARGET),
277        (InstType::Copy { .. }, WinIndicator::VCD_TARGET, false)
278        | (InstType::Copy { .. }, WinIndicator::Neither, false) => Some(WinIndicator::VCD_SOURCE),
279        _ => None,
280    }
281}
282///Finds the index of the instruction that controls the given output position.
283/// # Arguments
284/// * `insts` - The list of instructions to search.
285/// * `o_pos` - The output position to find the controlling instruction for.
286/// # Returns
287/// The index of the controlling instruction, or None if no such instruction exists.
288pub fn find_controlling_inst<I:PosInst>(insts:&[I],o_pos:u64)->Option<usize>{
289    let inst = insts.binary_search_by(|probe|{
290        let end = probe.o_start() + probe.len_in_o() as u64;
291        if (probe.o_start()..end).contains(&o_pos){
292            return std::cmp::Ordering::Equal
293        }else if probe.o_start() > o_pos {
294            return std::cmp::Ordering::Greater
295        }else{
296            return std::cmp::Ordering::Less
297        }
298    });
299    if let Ok(idx) = inst {
300        Some(idx)
301    }else {
302        None
303    }
304}
305
306///Returns a cloned and clipped subslice of instructions that exactly covers the requested output range.
307/// # Arguments
308/// * `instructions` - The list of instructions to extract from.
309/// * `start` - The output position (output byte offset) to start the slice at.
310/// * `len` - The length of the slice in output bytes.
311/// # Returns
312/// A vector containing the cloned and clipped instructions that exactly cover the requested output range.
313/// If the output range is not covered by the instructions, None is returned.
314///
315/// This does not check that the instructions are sequential.
316pub fn get_exact_slice<I:PosInst+Debug>(instructions:&[I],start:u64,len:u32)->Option<Vec<I>>{
317    let start_idx = find_controlling_inst(instructions,start)?;
318    let end_pos = start + len as u64;
319    let mut slice = Vec::new();
320    let mut complete = false;
321
322    for inst in instructions[start_idx..].iter() {
323        let inst_len = inst.len_in_o();
324        let o_start = inst.o_start();
325        let cur_inst_end = o_start + inst_len as u64;
326        let mut cur_inst = inst.clone();
327        if start > o_start {
328            let skip = start - o_start;
329            cur_inst.skip(skip as u32);
330        }
331        if end_pos < cur_inst_end {
332            let trunc = cur_inst_end - end_pos;
333            cur_inst.trunc(trunc as u32);
334        }
335        debug_assert!(cur_inst.len_in_o() > 0, "The instruction length is zero");
336        slice.push(cur_inst);
337
338        if cur_inst_end >= end_pos {
339            complete = true;
340            //debug_assert!(sum_len_in_o(&slice)==len as u64,"{} != {} start:{} end_pos:{} ... {:?} from {:?}",sum_len_in_o(&slice),len,start,end_pos,&slice,instructions);
341            break;
342        }
343    }
344    if !complete {
345        return None;
346    }
347    Some(slice)
348}
349
350//Should maybe move this to Reader?
351///Stats about the patch file.
352#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
353pub struct Stats{
354    pub add_bytes:usize,
355    pub run_bytes:usize,
356    pub copy_bytes:usize,
357    pub add_cnt:usize,
358    pub run_cnt:usize,
359    pub copy_s_cnt:usize,
360    pub copy_t_cnt:usize,
361    pub copy_q_cnt:usize,
362    pub output_size:usize,
363    pub contains_vcd_target:bool,
364}
365
366impl Stats {
367    pub fn new() -> Self {
368        Default::default()
369    }
370    pub fn add(&mut self, len:usize){
371        self.add_bytes += len;
372        self.add_cnt += 1;
373        self.output_size += len;
374    }
375    pub fn run(&mut self, len:usize){
376        self.run_bytes += len;
377        self.run_cnt += 1;
378        self.output_size += len;
379    }
380    pub fn copy_s(&mut self, len:usize){
381        self.copy_bytes += len;
382        self.copy_s_cnt += 1;
383        self.output_size += len;
384    }
385    pub fn copy_t(&mut self, len:usize){
386        self.copy_bytes += len;
387        self.copy_t_cnt += 1;
388        self.output_size += len;
389    }
390    pub fn copy_q(&mut self, len_in_o:usize){
391        self.copy_bytes += len_in_o;
392        self.copy_q_cnt += 1;
393        self.output_size += len_in_o;
394    }
395    pub fn vcd_trgt(&mut self){
396        self.contains_vcd_target = true;
397    }
398    pub fn has_copy(&self)->bool{
399        self.copy_bytes > 0
400    }
401}
402
403///Extracts all instructions from all windows.
404///Memory consumption may be 2-4x the size of the encoded (uncompressed) patch.
405pub fn extract_patch_instructions<R:Read + Seek>(patch:R)->std::io::Result<(Vec<SparseInst>, Stats)>{
406    let mut insts = Vec::new();
407    let mut reader = VCDReader::new(patch)?;
408    let mut ssp = None;
409    let mut sss = None;
410    let mut vcd_trgt = false;
411    let mut o_pos_start = 0;
412    let mut stats = Stats::new();
413    loop{
414        match reader.next()?{
415            VCDiffReadMsg::WindowSummary(ws) => {
416                ssp = ws.source_segment_position;
417                sss = ws.source_segment_size;
418                if ws.win_indicator == WinIndicator::VCD_TARGET{
419                    vcd_trgt = true;
420                    stats.vcd_trgt();
421                }
422            },
423            VCDiffReadMsg::Inst { first, second } => {
424                for inst in [Some(first), second]{
425                    if inst.is_none(){
426                        continue;
427                    }
428                    let inst = inst.unwrap();
429                    let len_o = inst.len_in_o() as usize;
430                    match inst{
431                        Inst::Add(ADD{ len, p_pos }) => {
432                            let mut bytes = vec![0; len as usize];
433                            reader.read_from_src(p_pos, &mut bytes)?;
434                            insts.push(SparseInst{o_pos_start,inst:DInst::Add(ExAdd { bytes })});
435                            stats.add(len_o);
436                        },
437                        Inst::Run(run) => {
438                            stats.run(len_o);
439                            insts.push(SparseInst{o_pos_start,inst:DInst::Run(run)})
440                        },
441                        Inst::Copy(copy) =>{
442                            let ssp = ssp.expect("SSP not set");
443                            let sss = sss.expect("SSS not set");
444                            match copy.copy_type{
445                                CopyType::CopyQ{..} => {
446                                    stats.copy_q(len_o);
447                                },
448                                CopyType::CopyS => {
449                                    stats.copy_s(len_o);
450                                },
451                                CopyType::CopyT{..} => {
452                                    stats.copy_t(len_o);
453                                },
454                            }
455                            insts.push(SparseInst{
456                                o_pos_start,
457                                inst:DInst::Copy(DCopy::from_copy(copy, ssp, sss as u32, vcd_trgt))
458                            });
459                        }
460                    }
461                    o_pos_start += len_o as u64;
462                }
463            },
464            VCDiffReadMsg::EndOfWindow => {
465                ssp = None;
466                sss = None;
467                vcd_trgt = false;
468            },
469            VCDiffReadMsg::EndOfFile => break,
470        }
471    }
472    Ok((insts,stats))
473}
474
475/// This function will dereference all Non-CopySS instructions in the extracted instructions.
476pub fn deref_non_copy_ss(extracted:Vec<SparseInst>)->Vec<SparseInst>{
477    let mut output:Vec<SparseInst> = Vec::with_capacity(extracted.len());
478    let mut cur_o_pos = 0;
479    for SparseInst { inst, .. } in extracted {
480        let (o_start,slice_len,seq_len) = match (inst.inst_type(),inst.vcd_trgt()){
481            (InstType::Copy (CopyType::CopyS),false) |
482            (InstType::Run, _) |
483            (InstType::Add, _) => {
484                let o_pos_start = cur_o_pos;
485                cur_o_pos += inst.len_in_o() as u64;
486                output.push(SparseInst { o_pos_start, inst });
487                continue;
488            },
489            (InstType::Copy (CopyType::CopyQ { len_o }),_) => {
490                let slice_len = inst.len_in_u() - len_o;
491                let o_start = cur_o_pos - slice_len as u64;
492                (o_start,slice_len,len_o)
493            },
494            (InstType::Copy (CopyType::CopyT { inst_u_pos_start }),_) => {
495                let copy = inst.clone().take_copy().unwrap();
496                let offset = inst_u_pos_start - copy.u_pos;
497                let o_start = cur_o_pos - offset as u64;
498                (o_start,copy.len_in_u(),0)
499            },
500            (InstType::Copy (CopyType::CopyS),true) => {
501                let copy = inst.clone().take_copy().unwrap();
502                let o_start = copy.ssp + copy.u_pos as u64;
503                (o_start,copy.len_in_u(),0)
504            },
505
506        };
507        let resolved = get_exact_slice(output.as_slice(), o_start, slice_len).unwrap();
508        if seq_len > 0 {
509            expand_sequence(&resolved, seq_len,&mut cur_o_pos, &mut output);
510        }else{
511            for resolved_inst in resolved {
512                let o_pos_start = cur_o_pos;
513                cur_o_pos += resolved_inst.inst.len_in_o() as u64;
514                output.push(SparseInst { o_pos_start, inst: resolved_inst.inst });
515            }
516        }
517    }
518    output
519}
520
521fn find_copy_s(extract:&[SparseInst],shift:usize,dest:&mut Vec<usize>){
522    for (i,ext) in extract.iter().enumerate(){
523        match (ext.inst_type(),ext.inst.vcd_trgt()){
524            (InstType::Copy (CopyType::CopyS),false) => dest.push(i+shift),
525            _ => (),
526        }
527    }
528}
529
530///Merger struct that can accept merging of additional patches.
531#[derive(Clone, Debug)]
532pub struct Merger{
533    ///The summary patch that will be written to the output.
534    terminal_patch: Vec<SparseInst>,
535    ///If this is empty, merging a patch will have no effect.
536    ///These are where TerminalInst::CopySS are found.
537    terminal_copy_indices: Vec<usize>,
538    //final_size: u64,
539}
540
541impl Merger {
542    ///Creates a new Merger from a terminal patch.
543    ///This should only be called using the patch that generates the output file you want.
544    /// # Arguments
545    /// * `terminal_patch` - The terminal patch that will serve as the core set of instructions.
546    /// # Returns
547    /// If the terminal patch has no Copy instructions, a SummaryPatch is returned.
548    /// If the terminal patch has even a single Copy instructions, a Merger is returned.
549    pub fn new<R:Read + Seek>(terminal_patch:R) -> std::io::Result<Result<Merger,SummaryPatch>> {
550        let (terminal_patch,stats) = extract_patch_instructions(terminal_patch)?;
551        if stats.copy_bytes == 0{
552            return Ok(Err(SummaryPatch(terminal_patch)));
553        }
554        let mut terminal_copy_indices = Vec::new();
555        //we for sure need to translate local. I think translate global isn't needed??
556        //will need to check this.
557        let terminal_patch = deref_non_copy_ss(terminal_patch);
558        find_copy_s(&terminal_patch,0,&mut terminal_copy_indices);
559        debug_assert!(!terminal_copy_indices.is_empty(), "terminal_copy_indices should not be empty");
560        Ok(Ok(Merger{
561            terminal_patch,
562            terminal_copy_indices,
563            //final_size:stats.output_size as u64
564        }))
565    }
566    ///Merges a predecessor patch into the terminal patch.
567    ///This should be called using proper order of patches.
568    /// # Arguments
569    /// * `predecessor_patch` - The patch to merge into the current summary patch.
570    /// # Returns
571    /// If the resulting patch has no Copy instructions, a SummaryPatch is returned.
572    /// If the resulting patch has even a single Copy instructions, a Merger is returned.
573    pub fn merge<R:Read + Seek>(mut self, predecessor_patch:R) -> std::io::Result<Result<Merger,SummaryPatch>> {
574        debug_assert!({
575            let mut x = 0;
576            for inst in self.terminal_patch.iter(){
577                assert_eq!(x,inst.o_pos_start);
578                x += inst.inst.len_in_o() as u64;
579            }
580            true
581        });
582        let (mut predecessor_patch,stats) = extract_patch_instructions(predecessor_patch)?;
583        if stats.has_copy(){
584            predecessor_patch = deref_non_copy_ss(predecessor_patch);
585        }
586        let mut terminal_copy_indices = Vec::with_capacity(self.terminal_copy_indices.len());
587        let mut inserts = Vec::with_capacity(self.terminal_copy_indices.len());
588        let mut shift = 0;
589        for i in self.terminal_copy_indices{
590            let SparseInst { inst,.. } = self.terminal_patch[i].clone();
591            let copy = inst.take_copy().expect("Expected Copy");
592            //this a src window copy that we need to resolve from the predecessor patch.
593            debug_assert!(copy.copy_in_s());
594            debug_assert!(!copy.vcd_trgt());
595            let o_start = copy.ssp + copy.u_pos as u64; //ssp is o_pos, u is offset from that.
596            let resolved = get_exact_slice(&predecessor_patch, o_start, copy.len_in_u()).unwrap();
597            //debug_assert_eq!(sum_len_in_o(&resolved), copy.len_in_o() as u64, "resolved: {:?} copy: {:?}",resolved,copy);
598            find_copy_s(&resolved, i+shift, &mut terminal_copy_indices);
599            shift += resolved.len() - 1;
600            inserts.push((i, resolved));
601
602        }
603        //now we expand the old copy values with the derefd instructions.
604        self.terminal_patch = expand_elements(self.terminal_patch, inserts);
605        //debug_assert_eq!(sum_len_in_o(&self.terminal_patch), self.final_size, "final size: {} sum_len: {}",self.final_size,sum_len_in_o(&self.terminal_patch));
606        if terminal_copy_indices.is_empty(){
607            Ok(Err(SummaryPatch(self.terminal_patch)))
608        }else{
609            self.terminal_copy_indices = terminal_copy_indices;
610            Ok(Ok(self))
611        }
612    }
613    pub fn finish(self)->SummaryPatch{
614        SummaryPatch(self.terminal_patch)
615    }
616
617}
618
619///This is returned when the current summary patch contains no Copy instructions, OR when you are finished with the Merger.
620#[derive(Debug)]
621pub struct SummaryPatch(Vec<SparseInst>);
622impl SummaryPatch{
623    ///Writes the summary patch to a sink.
624    /// # Arguments
625    /// * `sink` - The sink to write the summary patch to.
626    /// * `max_u_size` - The maximum size of the super string U. If None, the default is 256MB. This is used to help determine when new windows are created.
627    /// # Returns
628    /// The sink that was passed in.
629    pub fn write<W:Write>(self,sink:W,max_u_size:Option<usize>)->std::io::Result<W>{
630        let max_u_size = max_u_size.unwrap_or(1<<28); //256MB
631        let header = Header::default();
632        let encoder = VCDWriter::new(sink,header)?;
633        let mut state = WriterState{
634            cur_o_pos: 0,
635            max_u_size,
636            cur_win: Vec::new(),
637            sink: encoder,
638            win_sum: Some(Default::default()),
639        };
640        for inst in self.0.into_iter(){
641            state.apply_instruction(inst.inst)?;
642        }
643        state.flush_window()?;
644        state.sink.finish()
645    }
646}
647
648struct WriterState<W>{
649    cur_o_pos: u64,
650    max_u_size: usize,
651    cur_win: Vec<DInst>,
652    win_sum: Option<WriteWindowHeader>,
653    sink: VCDWriter<W>,
654}
655
656impl<W:Write> WriterState<W> {
657    ///This is the 'terminal' command for moving the merger forward.
658    fn apply_instruction(&mut self, instruction: DInst) ->std::io::Result<()>{
659        //this needs to do several things:
660        //see if we our new instruction is the same win_indicator as our current window
661        //see if our sss/ssp values need to be changed,
662        // if so, will the change make us exceed our MaxWinSize?
663        //   if so, we need to flush the window
664        //   else we just update them
665        self.handle_indicator(&instruction)?;//might flush
666        let mut cur_inst = Some(instruction);
667        while let Some(ci) = cur_inst.take(){
668            let (cur_s,_) = self.cur_win_sizes();
669            let inst_s = ci.src_range();
670            let remaining_size = self.max_u_size as u64 - self.current_u_size() as u64;
671            let remaining_size = if remaining_size == 0{
672                self.flush_window()?;
673                cur_inst = Some(ci);
674                continue;
675            }else{
676                NonZeroU32::new(remaining_size as u32).unwrap()
677            };
678            match (cur_s,inst_s) {
679                (Some((ssp,sss)), Some(r)) => {
680                    if let Some(disjoint) = get_disjoint_range(ssp..ssp+sss, r.clone()){
681                        let disjoint_len = (disjoint.end - disjoint.start) as u32;
682                        //if this is larger than our remaining window, we need to flush, splitting won't help
683                        if disjoint_len > remaining_size.into(){
684                            // println!("flushing (disjoint), cur_o_pos: {} cur_win_size: {} max_u_size: {}", self.cur_o_pos, self.current_window_size(), self.max_u_size);
685                            // println!("sss: {} ssp: {} r: {:?} disjoin_len {:?} remaining {}", sss,ssp,r,disjoint_len,remaining_size);
686                            self.flush_window()?;
687                            //we have invalidated our variables so loop again
688                            cur_inst = Some(ci);
689                            continue;
690                        }
691                    }//else we overlap partially, so splitting would help
692                },
693                //splitting will help the rest of the cases.
694                _ => (),
695            }
696            //if we are here we can naively check if we need to split the inst
697            let split_at = ci.will_fit_window(remaining_size);
698            match split_at {
699                Some(len) =>{
700                    debug_assert!(len.get() < ci.len_in_o() as u32, "split at: {} len: {}",len,ci.len_in_o());
701                    let (first,second) = ci.split_at(len.get());
702                    self.add_to_window(first);
703                    debug_assert!(second.len_in_o()>0, "second len: {}",second.len_in_o());
704                    cur_inst = Some(second);
705                }
706                None => self.add_to_window(ci),
707            }
708        }
709        //only if o_pos == crsr_pos do we check again if we should flush the window.
710        //we give it an extra 5 bytes so we don't truncate down to a inst len of 1;
711        if self.current_window_size() + 5 >= self.max_u_size as usize{
712            // println!("flushing (normal), cur_o_pos: {} cur_win_size: {} max_u_size: {}", self.cur_o_pos, self.current_window_size(), self.max_u_size);
713            self.flush_window()?;
714        }
715        Ok(())
716
717    }
718    fn add_to_window(&mut self,next_inst:DInst){
719        //adjust our current window
720        let (src_range,trgt_win_size) = self.new_win_sizes(&next_inst);
721        let ws = self.win_sum.get_or_insert(Default::default());
722        if let Some((ssp,sss)) = src_range {
723            ws.source_segment_position = Some(ssp);
724            ws.source_segment_size = Some(sss);
725            if ws.win_indicator != WinIndicator::VCD_SOURCE {
726                ws.win_indicator = WinIndicator::VCD_SOURCE;
727            }
728        }
729        ws.size_of_the_target_window = trgt_win_size;
730        self.cur_o_pos += next_inst.len_in_o() as u64;
731        self.cur_win.push(next_inst);
732    }
733    fn handle_indicator(&mut self,inst:&DInst)->std::io::Result<()>{
734        let win_sum = self.win_sum.get_or_insert(Default::default());
735
736        match (win_sum.win_indicator,comp_indicator(&inst.inst_type(),&win_sum.win_indicator,inst.vcd_trgt())){
737            (_, None) => (),
738            (WinIndicator::Neither, Some(set)) => {
739                win_sum.win_indicator = set;
740            },
741            (WinIndicator::VCD_TARGET, Some(next)) |
742            (WinIndicator::VCD_SOURCE, Some(next)) => {
743                self.flush_window()?;
744                let mut h = WriteWindowHeader::default();
745                h.win_indicator = next;
746                self.win_sum = Some(h);
747            },
748        }
749        Ok(())
750    }
751    ///(ssp,sss, size_of_the_target_window (T))
752    fn new_win_sizes(&self,inst:&DInst)->(Option<(u64,u64)>,u64){
753        let src_range = inst.src_range();
754        let ws = self.win_sum.clone().unwrap_or(Default::default());
755        let ss = if let Some(r) = src_range{
756            let ssp = ws.source_segment_position.unwrap_or(r.start);
757            let sss = ws.source_segment_size.unwrap_or(r.end - r.start);
758            let new_r = get_superset(r, ssp..ssp+sss);
759            //first figure out the change in size of our window
760            let new_sss = new_r.end - new_r.start;
761            Some((new_r.start,new_sss))
762        }else{
763            None
764        };
765        (ss,ws.size_of_the_target_window+inst.len_in_o() as u64)
766    }
767    fn cur_win_sizes(&self)->(Option<(u64,u64)>,u64){
768        let ws = self.win_sum.clone().unwrap_or(Default::default());
769        let ss = ws.source_segment_position.map(|start| (start,ws.source_segment_size.unwrap()));
770        (ss,ws.size_of_the_target_window)
771    }
772
773    fn flush_window(&mut self) -> std::io::Result<()> {
774        // Write out the current window
775        let ws = self.win_sum.take().expect("win_sum should be set here");
776        let output_ssp = ws.source_segment_position.clone();
777        self.sink.start_new_win(ws)?;
778        for inst in self.cur_win.drain(..) {
779            match inst {
780                DInst::Add(ExAdd{bytes}) => {
781                    self.sink.next_inst(WriteInst::ADD(bytes)).unwrap();
782                },
783                DInst::Run(r) => {
784                    self.sink.next_inst(WriteInst::RUN(r)).unwrap();
785                },
786                DInst::Copy(DCopy { len_u, u_pos , ssp,copy_type,.. }) => {
787                    //we need to translate this so our u_pos is correct
788                    let output_ssp = output_ssp.expect("output_ssp should be set here");
789                    //our ssp is within (or coincident) to the bounds of the source segment
790                    let copy_inst = if output_ssp > ssp{
791                        let neg_shift = output_ssp - ssp;
792                        COPY { len:len_u, u_pos: u_pos - neg_shift as u32,copy_type }
793                    }else{
794                        let pos_shift = ssp - output_ssp;
795                        COPY { len:len_u, u_pos: u_pos + pos_shift as u32,copy_type }
796                    };
797                    self.sink.next_inst(WriteInst::COPY(copy_inst)).unwrap();
798                },
799            }
800        }
801        Ok(())
802    }
803    fn current_window_size(&self) -> usize {
804        self.win_sum.as_ref().map(|h| h.size_of_the_target_window as usize).unwrap_or(0)
805    }
806    fn current_u_size(&self) -> usize {
807        self.win_sum.as_ref().map(|h| h.size_of_the_target_window as usize + h.source_segment_size.unwrap_or(0) as usize).unwrap_or(0)
808    }
809}
810
811fn expand_sequence(seq:&[SparseInst],len:u32,cur_o_pos:&mut u64,output:&mut Vec<SparseInst>) {
812    let mut current_len = 0;
813    // Calculate the effective length after considering truncation
814    while current_len < len {
815        for SparseInst {  inst, .. } in seq.iter().cloned() {
816            let end_pos = current_len + inst.len_in_o();
817            let mut modified_instruction = inst;
818            // After skip has been accounted for, directly clone and potentially modify for truncation
819            // If adding this instruction would exceed the effective length, apply truncation
820            if end_pos > len {
821                let trunc_amt = end_pos - len;
822                modified_instruction.trunc(trunc_amt);
823            }
824            let inst_len = modified_instruction.len_in_o();
825            output.push(SparseInst { o_pos_start:*cur_o_pos, inst: modified_instruction });
826            current_len += inst_len;
827            *cur_o_pos += inst_len as u64;
828            // If we've reached or exceeded the effective length, break out of the loop
829            if current_len >= len {
830                debug_assert_eq!(current_len, len, "current_len: {} len: {}", current_len, len);
831                break;
832            }
833        }
834    }
835}
836
837
838
839fn get_superset<T: Ord>(range1: Range<T>, range2: Range<T>) -> Range<T> {
840    // Find the minimum start point
841    let start = std::cmp::min(range1.start, range2.start);
842
843    // Find the maximum end point
844    let end = std::cmp::max(range1.end, range2.end);
845
846    // Return the superset range
847    start..end
848}
849
850///Returns the inner disjoint range, if any, between two ranges.
851fn get_disjoint_range<T: Ord + Debug>(range1: Range<T>, range2: Range<T>) -> Option<Range<T>> {
852    use std::cmp::Ordering;
853    match (range1.start.cmp(&range2.start), range1.end.cmp(&range2.end)) {
854        // range1 completely before range2
855        (Ordering::Less, Ordering::Less) => {
856            if range1.end < range2.start {
857                Some(range1.end..range2.start)
858            } else {
859                None
860            }
861        }
862        // range2 completely before range1
863        (Ordering::Greater, Ordering::Greater) => {
864            if range2.end < range1.start {
865                Some(range2.end..range1.start)
866            } else {
867                None
868            }
869        }
870        // Overlapping ranges
871        _ => None,
872    }
873}
874fn expand_elements(mut target: Vec<SparseInst>, inserts: Vec<(usize, Vec<SparseInst>)>) -> Vec<SparseInst>{
875    // Calculate the total number of elements to be inserted to determine the new vector's length.
876    let total_insertions: usize = inserts.iter().map(|(_, ins)| ins.len()).sum();
877    let final_length = target.len() + total_insertions;
878
879    // Allocate a new vector with the final required size.
880    let mut result = Vec::with_capacity(final_length);
881
882    // Sort inserts by position to process them in order.
883    let mut sorted_inserts = inserts;
884    sorted_inserts.sort_by_key(|k| k.0);
885
886    target.reverse();
887    // Trackers for the current position in the original vector and the inserts.
888    let mut cur_idx = 0;
889    let mut cur_o_pos = 0;
890    for (insert_pos, insert_vec) in sorted_inserts {
891        // Copy elements from the current position up to the insert position.
892        while cur_idx < insert_pos {
893            match target.pop() {
894                Some(mut elem) => {
895                    let len = elem.len_in_o();
896                    elem.o_pos_start = cur_o_pos;
897                    cur_o_pos += len as u64;
898                    result.push(elem);
899                    cur_idx += 1;
900                }
901                None => break,
902            }
903        }
904        // Insert the new elements.
905        for mut elem in insert_vec {
906            let len = elem.len_in_o();
907            elem.o_pos_start = cur_o_pos;
908            cur_o_pos += len as u64;
909            result.push(elem);
910        }
911        target.pop();//we get rid of the expanded element.
912        cur_idx += 1;
913    }
914
915    // After processing all inserts, copy any remaining elements from the original vector.
916    while let Some(mut elem) = target.pop() {
917        let len = elem.len_in_o();
918        elem.o_pos_start = cur_o_pos;
919        cur_o_pos += len as u64;
920        result.push(elem);
921    }
922    result
923}
924
925
926#[cfg(test)]
927mod test_super {
928
929    use vcdiff_common::DeltaIndicator;
930    use vcdiff_decoder::apply_patch;
931    use vcdiff_writer::WriteWindowHeader;
932
933    use super::*;
934    /*
935    Basic merger tests will start with a src file of '01234'
936    We will then create a series of patches that will make certain *changes* to the file.
937    That is, we want to be able to apply them in different orders for different effects.
938    To this end, all of the target windows must be the same size.
939    We will pick 10 bytes as our target window size. This is twice the length of 'hello'
940
941    We need to test the following:
942    Sequence unrolling
943    Copy Passthrough
944    Add/Run precedence
945
946    For the seq:
947    We will simply Copy the first byte and then 2 bytes and sequence them to len 10.
948    This should turn '01234' into '0230230230'
949
950    For the copy:
951    We will make a patch that will copy the first five bytes to the last five bytes.
952    This should turn '01234' into '0123401234'
953
954    For the add/run:
955    We will make a patch that will insert 'A' (ADD) at first pos Copy next 2, Then 'XXX'(Run) + 'YZ'(Add) The COpy rem
956    This should turn '01234' into 'A12XXXYZ34'
957
958    Then we do a patch with multiple transforms internally
959    Complex:
960    We will Add 'Y' Run(2) 'Z' CopySeq last in S u_pos 4 len 6 (len in t=3) Copy u_pos 1 len 4
961    This should turn '01234' into 'YZZ4YZ1234'
962
963    We can then mix and match these patches and we should be able to reason about the outputs.
964    */
965    const HDR:Header = Header { hdr_indicator: 0, secondary_compressor_id: None, code_table_data: None };
966    const WIN_HDR:WriteWindowHeader = WriteWindowHeader {
967        win_indicator: WinIndicator::VCD_SOURCE,
968        source_segment_size: Some(5),
969        source_segment_position: Some(0),
970        size_of_the_target_window: 10,
971        delta_indicator: DeltaIndicator(0),
972    };
973    use std::io::Cursor;
974    fn make_patch_reader(patch_bytes:Vec<u8>)->Cursor<Vec<u8>>{
975        Cursor::new(patch_bytes)
976    }
977    fn seq_patch() -> Cursor<Vec<u8>> {
978        let mut encoder = VCDWriter::new(Cursor::new(Vec::new()), HDR).unwrap();
979        encoder.start_new_win(WIN_HDR).unwrap();
980        // Instructions
981        encoder.next_inst(WriteInst::COPY(COPY { len: 1, u_pos: 0,copy_type:CopyType::CopyS })).unwrap();
982        encoder.next_inst(WriteInst::COPY(COPY { len: 2, u_pos: 2,copy_type:CopyType::CopyS  })).unwrap();
983        encoder.next_inst(WriteInst::COPY(COPY { len: 10, u_pos: 5,copy_type:CopyType::CopyQ { len_o:7 }  })).unwrap();
984        // Force encoding
985        let w = encoder.finish().unwrap().into_inner();
986        make_patch_reader(w)
987    }
988    fn copy_patch() -> Cursor<Vec<u8>> {
989        let mut encoder = VCDWriter::new(Cursor::new(Vec::new()), HDR).unwrap();
990        encoder.start_new_win(WIN_HDR).unwrap();
991        // Instructions
992        encoder.next_inst(WriteInst::COPY(COPY { len: 5, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap();
993        encoder.next_inst(WriteInst::COPY(COPY { len: 5, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap();
994        // Force encoding
995        let w = encoder.finish().unwrap().into_inner();
996        make_patch_reader(w)
997    }
998    fn add_run_patch() -> Cursor<Vec<u8>> {
999        let mut encoder = VCDWriter::new(Cursor::new(Vec::new()), HDR).unwrap();
1000        encoder.start_new_win(WIN_HDR).unwrap();
1001        // Instructions
1002        encoder.next_inst(WriteInst::ADD("A".as_bytes().to_vec())).unwrap();
1003        encoder.next_inst(WriteInst::COPY(COPY { len: 2, u_pos: 1,copy_type:CopyType::CopyS  })).unwrap();
1004        encoder.next_inst(WriteInst::RUN(RUN { len: 3, byte: b'X' })).unwrap();
1005        encoder.next_inst(WriteInst::ADD("YZ".as_bytes().to_vec())).unwrap();
1006        encoder.next_inst(WriteInst::COPY(COPY { len: 2, u_pos: 3,copy_type:CopyType::CopyS  })).unwrap();
1007
1008        // Force encoding
1009        let w = encoder.finish().unwrap().into_inner();
1010        make_patch_reader(w)
1011    }
1012    fn complex_patch()->Cursor<Vec<u8>>{
1013        let mut encoder = VCDWriter::new(Cursor::new(Vec::new()), HDR).unwrap();
1014        encoder.start_new_win(WIN_HDR).unwrap();
1015        // Instructions
1016        encoder.next_inst(WriteInst::ADD("Y".as_bytes().to_vec())).unwrap();
1017        encoder.next_inst(WriteInst::RUN(RUN { len: 2, byte: b'Z' })).unwrap();
1018        encoder.next_inst(WriteInst::COPY(COPY { len: 1, u_pos: 4,copy_type:CopyType::CopyS  })).unwrap();
1019        encoder.next_inst(WriteInst::COPY(COPY { len: 2, u_pos: 5,copy_type:CopyType::CopyT { inst_u_pos_start: 9 }  })).unwrap();
1020        encoder.next_inst(WriteInst::COPY(COPY { len: 4, u_pos: 1,copy_type:CopyType::CopyS  })).unwrap();
1021        // Force encoding
1022        let w = encoder.finish().unwrap().into_inner();
1023        make_patch_reader(w)
1024    }
1025    const SRC:&[u8] = b"01234";
1026    #[test]
1027    fn test_copy_seq(){
1028        //01234 Copy-> 0123401234 Seq-> 0230230230
1029        let answer = b"0230230230";
1030        let copy = copy_patch();
1031        let seq = seq_patch();
1032        let merger = Merger::new(seq).unwrap().unwrap();
1033        let merger = merger.merge(copy).unwrap().unwrap();
1034        let merged_patch = merger.finish().write(Vec::new(), None).unwrap();
1035        let mut cursor = Cursor::new(merged_patch);
1036        let mut output = Vec::new();
1037        apply_patch(&mut cursor, Some(&mut Cursor::new(SRC.to_vec())), &mut output).unwrap();
1038        //print output as a string
1039        let as_str = std::str::from_utf8(&output).unwrap();
1040        println!("{}",as_str);
1041        assert_eq!(output,answer);
1042    }
1043    #[test]
1044    fn test_seq_copy(){
1045        //01234 Seq-> 0230230230 Copy-> 0230202302
1046        let answer = b"0230202302";
1047        let copy = copy_patch();
1048        let seq = seq_patch();
1049        let merger = Merger::new(copy).unwrap().unwrap();
1050        let merger = merger.merge(seq).unwrap().unwrap();
1051        let merged_patch = merger.finish().write(Vec::new(), None).unwrap();
1052        let mut cursor = Cursor::new(merged_patch);
1053        let mut output = Vec::new();
1054        apply_patch(&mut cursor, Some(&mut Cursor::new(SRC.to_vec())), &mut output).unwrap();
1055        //print output as a string
1056        let as_str = std::str::from_utf8(&output).unwrap();
1057        println!("{}",as_str);
1058        assert_eq!(output,answer);
1059    }
1060    #[test]
1061    fn test_seq_copy_add(){
1062        //01234 Seq->Copy 0230202302 Add-> A23XXXYZ02
1063        let answer = b"A23XXXYZ02";
1064        let seq = seq_patch();
1065        let copy = copy_patch();
1066        let add_run = add_run_patch();
1067        let merger = Merger::new(add_run).unwrap().unwrap();
1068        let merger = merger.merge(copy).unwrap().unwrap();
1069        let merger = merger.merge(seq).unwrap().unwrap();
1070        let merged_patch = merger.finish().write(Vec::new(), None).unwrap();
1071        let mut cursor = Cursor::new(merged_patch);
1072        let mut output = Vec::new();
1073        apply_patch(&mut cursor, Some(&mut Cursor::new(SRC.to_vec())), &mut output).unwrap();
1074        //print output as a string
1075        let as_str = std::str::from_utf8(&output).unwrap();
1076        println!("{}",as_str);
1077        assert_eq!(output,answer);
1078    }
1079    #[test]
1080    fn test_copy_seq_add(){
1081        //01234 Copy->Seq 0230230230 Add-> A23XXXYZ02
1082        let answer = b"A23XXXYZ02";
1083        let seq = seq_patch();
1084        let copy = copy_patch();
1085        let add_run = add_run_patch();
1086        let merger = Merger::new(add_run).unwrap().unwrap();
1087        let merger = merger.merge(seq).unwrap().unwrap();
1088        let merger = merger.merge(copy).unwrap().unwrap();
1089        let merged_patch = merger.finish().write(Vec::new(), None).unwrap();
1090        let mut cursor = Cursor::new(merged_patch);
1091        let mut output = Vec::new();
1092        apply_patch(&mut cursor, Some(&mut Cursor::new(SRC.to_vec())), &mut output).unwrap();
1093        //print output as a string
1094        let as_str = std::str::from_utf8(&output).unwrap();
1095        println!("{}",as_str);
1096        assert_eq!(output,answer);
1097    }
1098    #[test]
1099    fn test_add_complex(){
1100        //01234 Add-> A12XXXYZ34 Compl YZZXYZ12XX
1101        let answer = b"YZZXYZ12XX";
1102        let add_run = add_run_patch();
1103        let comp = complex_patch();
1104        let merger = Merger::new(comp).unwrap().unwrap();
1105        let merger = merger.merge(add_run).unwrap().unwrap();
1106        let merged_patch = merger.finish().write(Vec::new(), None).unwrap();
1107        let mut cursor = Cursor::new(merged_patch);
1108        let mut output = Vec::new();
1109        apply_patch(&mut cursor, Some(&mut Cursor::new(SRC.to_vec())), &mut output).unwrap();
1110        //print output as a string
1111        let as_str = std::str::from_utf8(&output).unwrap();
1112        println!("{}",as_str);
1113        assert_eq!(output,answer);
1114    }
1115    #[test]
1116    fn test_all_seq(){
1117        //01234 Add-> A12XXXYZ34 Compl YZZXYZ12XX -> Copy YZZXYYZZXY -> Seq YZXYZXYZXY
1118        let answer = b"YZXYZXYZXY";
1119        let add_run = add_run_patch();
1120        let comp = complex_patch();
1121        let copy = copy_patch();
1122        let seq = seq_patch();
1123        let merger = Merger::new(seq).unwrap().unwrap();
1124        let merger = merger.merge(copy).unwrap().unwrap();
1125        let merger = merger.merge(comp).unwrap().unwrap();
1126        let merger = merger.merge(add_run).unwrap().unwrap_err();
1127        let merged_patch = merger.write(Vec::new(), None).unwrap();
1128        let mut cursor = Cursor::new(merged_patch);
1129        let mut output = Vec::new();
1130        //We don't need Src, since the last merge yielded SummaryPatch
1131        apply_patch(&mut cursor, None, &mut output).unwrap();
1132        //print output as a string
1133        let as_str = std::str::from_utf8(&output).unwrap();
1134        println!("{}",as_str);
1135        assert_eq!(output,answer);
1136    }
1137    #[test]
1138    fn test_kitchen_sink(){
1139        //"hello" -> "hello world!" -> "Hello! Hello! Hello. hello. hello..."
1140        //we need to use a series of VCD_TARGET windows and Sequences across multiple patches
1141        //we should use copy/seq excessively since add/run is simple in the code paths.
1142        let src = b"hello!";
1143        let mut encoder = VCDWriter::new(Cursor::new(Vec::new()), HDR).unwrap();
1144        encoder.start_new_win(WriteWindowHeader { win_indicator: WinIndicator::VCD_SOURCE, source_segment_size: Some(5), source_segment_position: Some(0), size_of_the_target_window:5 , delta_indicator: DeltaIndicator(0) }).unwrap();
1145        // Instructions
1146        encoder.next_inst(WriteInst::COPY(COPY { len: 5, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap();
1147        encoder.start_new_win(WriteWindowHeader { win_indicator: WinIndicator::VCD_TARGET, source_segment_size: Some(1), source_segment_position: Some(4), size_of_the_target_window:6 , delta_indicator: DeltaIndicator(0) }).unwrap();
1148        encoder.next_inst(WriteInst::ADD(" w".as_bytes().to_vec())).unwrap();
1149        encoder.next_inst(WriteInst::COPY(COPY { len: 1, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap();
1150        encoder.next_inst(WriteInst::ADD("rld".as_bytes().to_vec())).unwrap();
1151        encoder.start_new_win(WriteWindowHeader { win_indicator: WinIndicator::VCD_SOURCE, source_segment_size: Some(1), source_segment_position: Some(5), size_of_the_target_window:1 , delta_indicator: DeltaIndicator(0) }).unwrap();
1152        encoder.next_inst(WriteInst::COPY(COPY { len: 1, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap();
1153        let p1 = encoder.finish().unwrap().into_inner();
1154        let p1_answer = b"hello world!";
1155        let mut cursor = Cursor::new(p1.clone());
1156        let mut output = Vec::new();
1157        apply_patch(&mut cursor, Some(&mut Cursor::new(src.to_vec())), &mut output).unwrap();
1158        assert_eq!(output,p1_answer); //ensure our instructions do what we think they are.
1159        let patch_1 = make_patch_reader(p1);
1160        let mut encoder = VCDWriter::new(Cursor::new(Vec::new()), HDR).unwrap();
1161        encoder.start_new_win(WriteWindowHeader { win_indicator: WinIndicator::VCD_SOURCE, source_segment_size: Some(11), source_segment_position: Some(1), size_of_the_target_window:7 , delta_indicator: DeltaIndicator(0) }).unwrap();
1162        encoder.next_inst(WriteInst::ADD("H".as_bytes().to_vec())).unwrap();
1163        encoder.next_inst(WriteInst::COPY(COPY { len: 4, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap(); //ello
1164        encoder.next_inst(WriteInst::COPY(COPY { len: 1, u_pos: 10,copy_type:CopyType::CopyS  })).unwrap(); //'!'
1165        encoder.next_inst(WriteInst::COPY(COPY { len: 1, u_pos: 4,copy_type:CopyType::CopyS  })).unwrap(); //' '
1166        encoder.start_new_win(WriteWindowHeader { win_indicator: WinIndicator::VCD_TARGET, source_segment_size: Some(7), source_segment_position: Some(0), size_of_the_target_window:14 , delta_indicator: DeltaIndicator(0) }).unwrap();
1167        encoder.next_inst(WriteInst::COPY(COPY { len: 7, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap(); //'Hello! '
1168        encoder.next_inst(WriteInst::COPY(COPY { len: 12, u_pos: 7,copy_type:CopyType::CopyQ { len_o: 5 }  })).unwrap(); //'Hello! Hello'
1169        encoder.next_inst(WriteInst::ADD(".".as_bytes().to_vec())).unwrap();
1170        encoder.next_inst(WriteInst::COPY(COPY { len: 1, u_pos: 13,copy_type:CopyType::CopyS  })).unwrap(); // ' '
1171        encoder.start_new_win(WriteWindowHeader { win_indicator: WinIndicator::VCD_TARGET, source_segment_size: Some(6), source_segment_position: Some(15), size_of_the_target_window:7, delta_indicator: DeltaIndicator(0) }).unwrap();
1172        encoder.next_inst(WriteInst::ADD("h".as_bytes().to_vec())).unwrap();
1173        encoder.next_inst(WriteInst::COPY(COPY { len: 6, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap(); //'ello. '
1174        encoder.start_new_win(WriteWindowHeader { win_indicator: WinIndicator::VCD_TARGET, source_segment_size: Some(6), source_segment_position: Some(21), size_of_the_target_window:8 , delta_indicator: DeltaIndicator(0) }).unwrap();
1175        encoder.next_inst(WriteInst::COPY(COPY { len: 6, u_pos: 0,copy_type:CopyType::CopyS  })).unwrap(); //'hello.'
1176        encoder.next_inst(WriteInst::COPY(COPY { len: 3, u_pos: 11,copy_type:CopyType::CopyQ { len_o: 2 }  })).unwrap(); //Seq '.' == Run(3) '.'
1177        let p2 = encoder.finish().unwrap().into_inner();
1178        let p2_answer = b"Hello! Hello! Hello. hello. hello...";
1179        let mut cursor = Cursor::new(p2.clone());
1180        let mut output = Vec::new();
1181        apply_patch(&mut cursor, Some(&mut Cursor::new(p1_answer.to_vec())), &mut output).unwrap();
1182        assert_eq!(output,p2_answer);
1183        let patch_2 = make_patch_reader(p2);
1184        let merger = Merger::new(patch_2).unwrap().unwrap();
1185        let merger = merger.merge(patch_1).unwrap().unwrap();
1186        let merged_patch = merger.finish().write(Vec::new(), None).unwrap();
1187        let mut cursor = Cursor::new(merged_patch);
1188        let mut output = Vec::new();
1189        let answer = b"Hello! Hello! Hello. hello. hello...";
1190        apply_patch(&mut cursor, Some(&mut Cursor::new(src.to_vec())), &mut output).unwrap();
1191        //print output as a string
1192        let as_str = std::str::from_utf8(&output).unwrap();
1193        println!("{}",as_str);
1194        assert_eq!(output,answer);
1195    }
1196    #[test]
1197    fn test_disjoint_ranges() {
1198        let range1 = 1..5;
1199        let range2 = 8..12;
1200        let expected_disjoint = Some(5..8);
1201
1202        let result = get_disjoint_range(range1, range2);
1203        assert_eq!(result, expected_disjoint);
1204    }
1205
1206    #[test]
1207    fn test_overlapping_ranges() {
1208        let range1 = 3..9;
1209        let range2 = 7..12;
1210
1211        let result = get_disjoint_range(range1, range2);
1212        assert_eq!(result, None);
1213    }
1214
1215    #[test]
1216    fn test_adjacent_ranges() {
1217        let range1 = 1..5;
1218        let range2 = 5..9;
1219
1220        let result = get_disjoint_range(range1, range2);
1221        assert_eq!(result, None);
1222    }
1223
1224    #[test]
1225    fn test_equal_ranges() {
1226        let range1 = 2..6;
1227        let range2 = 2..6;
1228
1229        let result = get_disjoint_range(range1, range2);
1230        assert_eq!(result, None);
1231    }
1232    #[test]
1233    fn test_get_superset() {
1234        // Test Case 1: range1 supersedes range2
1235        let range1 = 10..20;
1236        let range2 = 12..18;
1237        let expected_superset = 10..20;
1238        let result = get_superset(range1, range2);
1239        assert_eq!(result, expected_superset);
1240
1241        // Test Case 2: range2 supersedes range1
1242        let range1 = 0..2;
1243        let range2 = 7..10;
1244        let expected_superset = 0..10;
1245        let result = get_superset(range1, range2);
1246        assert_eq!(result, expected_superset);
1247
1248        // Test Case 3: Overlapping ranges
1249        let range1 = 0..15;
1250        let range2 = 10..20;
1251        let expected_superset = 0..20;
1252        let result = get_superset(range1, range2);
1253        assert_eq!(result, expected_superset);
1254
1255    }
1256    #[test]
1257    fn test_expand_seq(){
1258        let seq = vec![
1259            SparseInst { o_pos_start: 0, inst: DInst::Copy(DCopy { u_pos: 0, len_u: 2, sss: 0, ssp: 0, vcd_trgt: false, copy_type: CopyType::CopyS }) },
1260            SparseInst { o_pos_start: 2, inst: DInst::Copy(DCopy { u_pos: 4, len_u: 6, sss: 0, ssp: 0, vcd_trgt: false, copy_type: CopyType::CopyS }) },
1261            SparseInst { o_pos_start: 8, inst: DInst::Copy(DCopy { u_pos: 12, len_u: 4, sss: 0, ssp: 0, vcd_trgt: false, copy_type: CopyType::CopyS }) },
1262        ];
1263        let mut output = Vec::new();
1264        expand_sequence(&seq, 15, &mut 0,&mut output);
1265        let result = vec![
1266            SparseInst { o_pos_start: 0, inst: DInst::Copy(DCopy { u_pos: 0, len_u: 2, sss: 0, ssp: 0, vcd_trgt: false, copy_type: CopyType::CopyS }) },
1267            SparseInst { o_pos_start: 2, inst: DInst::Copy(DCopy { u_pos: 4, len_u: 6, sss: 0, ssp: 0, vcd_trgt: false, copy_type: CopyType::CopyS }) },
1268            SparseInst { o_pos_start: 8, inst: DInst::Copy(DCopy { u_pos: 12, len_u: 4, sss: 0, ssp: 0, vcd_trgt: false, copy_type: CopyType::CopyS }) },
1269            SparseInst { o_pos_start: 12, inst: DInst::Copy(DCopy { u_pos: 0, len_u: 2, sss: 0, ssp: 0, vcd_trgt: false, copy_type: CopyType::CopyS }) },
1270            SparseInst { o_pos_start: 14, inst: DInst::Copy(DCopy { u_pos: 4, len_u: 1, sss: 0, ssp: 0, vcd_trgt: false, copy_type: CopyType::CopyS }) },
1271        ];
1272        assert_eq!(output, result, "Output should contain a truncated instruction");
1273    }
1274}