libpijul_compat/optimal_diff/
mod.rs

1use backend::*;
2use graph;
3use graph::Graph;
4use patch;
5use patch::{Change, ChangeContext, Record};
6use {GenericTxn, Result};
7
8use conflict;
9use rand;
10use sanakirja::value::Value;
11use std;
12use std::cell::RefCell;
13use std::collections::HashMap;
14use std::path::PathBuf;
15use std::rc::Rc;
16use std::cmp::min;
17
18mod add;
19mod delete;
20
21#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
22enum Status {
23    Begin,
24    Separator,
25    End,
26}
27
28#[doc(hidden)]
29pub(crate) struct Diff<'a, 'b, T: 'a> {
30    lines_a: Vec<Key<PatchId>>,
31    contents_a: Vec<Value<'a, T>>,
32    conflicts_ancestors: HashMap<usize, usize>,
33    conflicts_descendants: HashMap<usize, usize>,
34    conflicts_sides: HashMap<usize, Vec<usize>>,
35    conflicts_down_contexts: HashMap<usize, (LineId, Rc<RefCell<Vec<Key<Option<PatchId>>>>>)>,
36    current_conflict_ancestor: Vec<usize>,
37    status: HashMap<usize, Status>,
38    inode: Key<Option<Hash>>,
39    file: Rc<PathBuf>,
40    line_num: &'b mut LineId,
41}
42
43#[derive(Debug)]
44pub(crate) struct Cursors {
45    i: usize,
46    j: usize,
47    oi: Option<usize>,
48    oj: Option<usize>,
49    pending: Pending,
50    last_alive_context: usize,
51    leading_equals: usize,
52    trailing_equals: usize,
53}
54
55impl<'a, 'b, T: Transaction + 'a> graph::LineBuffer<'a, T> for Diff<'a, 'b, T> {
56    fn output_line(&mut self, k: &Key<PatchId>, c: Value<'a, T>) -> Result<()> {
57        self.lines_a.push(k.clone());
58        self.contents_a.push(c);
59        Ok(())
60    }
61
62    fn begin_conflict(&mut self) -> Result<()> {
63        let len = self.lines_a.len();
64        self.current_conflict_ancestor.push(len);
65        self.status.insert(len, Status::Begin);
66
67        self.output_conflict_marker(conflict::START_MARKER)
68    }
69
70    fn end_conflict(&mut self) -> Result<()> {
71        let len = self.lines_a.len();
72        self.status.insert(len, Status::End);
73        self.conflicts_descendants
74            .insert(*self.current_conflict_ancestor.last().unwrap(), len);
75
76        self.output_conflict_marker(conflict::END_MARKER)?;
77
78        self.current_conflict_ancestor.pop();
79        Ok(())
80    }
81
82    fn conflict_next(&mut self) -> Result<()> {
83        self.status.insert(self.lines_a.len(), Status::Separator);
84        {
85            let e = self.conflicts_sides
86                .entry(*self.current_conflict_ancestor.last().unwrap())
87                .or_insert(Vec::new());
88            e.push(self.lines_a.len());
89        }
90        self.output_conflict_marker(conflict::SEPARATOR)
91    }
92
93    fn output_conflict_marker(&mut self, marker: &'a str) -> Result<()> {
94        let l = self.lines_a.len();
95        self.lines_a.push(ROOT_KEY.clone());
96        self.contents_a.push(Value::from_slice(marker.as_bytes()));
97        self.conflicts_ancestors
98            .insert(l, *self.current_conflict_ancestor.last().unwrap());
99        Ok(())
100    }
101}
102
103#[derive(Debug)]
104struct Deletion {
105    del: Option<Change<Rc<RefCell<ChangeContext<PatchId>>>>>,
106    conflict_ordering: Vec<Change<Rc<RefCell<ChangeContext<PatchId>>>>>,
107}
108
109#[derive(Debug)]
110enum Pending {
111    None,
112    Deletion(Deletion),
113    Addition(Change<Rc<RefCell<ChangeContext<PatchId>>>>),
114}
115
116impl Pending {
117    fn take(&mut self) -> Pending {
118        std::mem::replace(self, Pending::None)
119    }
120    fn is_none(&self) -> bool {
121        if let Pending::None = *self {
122            true
123        } else {
124            false
125        }
126    }
127}
128
129use std::ops::{Index, IndexMut};
130struct Matrix<T> {
131    rows: usize,
132    cols: usize,
133    v: Vec<T>,
134}
135impl<T: Clone> Matrix<T> {
136    fn new(rows: usize, cols: usize, t: T) -> Self {
137        Matrix {
138            rows: rows,
139            cols: cols,
140            v: vec![t; rows * cols],
141        }
142    }
143}
144impl<T> Index<usize> for Matrix<T> {
145    type Output = [T];
146    fn index(&self, i: usize) -> &[T] {
147        &self.v[i * self.cols..(i + 1) * self.cols]
148    }
149}
150impl<T> IndexMut<usize> for Matrix<T> {
151    fn index_mut(&mut self, i: usize) -> &mut [T] {
152        &mut self.v[i * self.cols..(i + 1) * self.cols]
153    }
154}
155
156fn amend_down_context<A: Transaction>(diff: &Diff<A>, i0: usize, i: usize) {
157    if let Some(&(ref line, ref down)) = diff.conflicts_down_contexts.get(&i0) {
158        // `line` is the LineId of the line inserted by
159        // this patch before i0 (remember that i0 is a
160        // conflict marker).
161        debug!("lines_eq, line = {:?}", line);
162        let key = diff.lines_a[i];
163        down.borrow_mut().push(Key {
164            patch: Some(key.patch),
165            line: key.line,
166        })
167    }
168}
169
170impl<A: Transaction, R: rand::Rng> GenericTxn<A, R> {
171    fn lines_eq(
172        &self,
173        branch: &Branch,
174        diff: &mut Diff<A>,
175        b: &[&[u8]],
176        cursors: &mut Cursors,
177        actions: &mut Vec<Record<Rc<RefCell<ChangeContext<PatchId>>>>>,
178    ) {
179        // Two lines are equal. If we were collecting lines to add or
180        // delete, we must stop here (in order to get the smallest
181        // possible patch).
182        debug!("eq: {:?} {:?}", cursors.i, cursors.j);
183
184        // Here we know that diff.lines_a[cursors.i] is equal to
185        // b[cursors.j].
186        //
187        // If the line just before this equality was a conflict marker
188        // (i.e. an index i such that diff.lines_a[i].is_root()), some
189        // other conflict markers might miss edges (because of how we
190        // record that situation in
191        // optimal_diff::add::add_lines_down_context). If this is the
192        // case, we need to add those missing edges.
193
194        if cursors.i > 0 && diff.lines_a[cursors.i - 1].is_root() {
195            // i0 is the conflict's ancestor (another "root" line).
196            let status = *diff.status.get(&(cursors.i - 1)).unwrap();
197            let mut i0 = *diff.conflicts_ancestors.get(&(cursors.i - 1)).unwrap();
198            if status < Status::End {
199                debug!("lines_eq, i0 = {:?}", i0);
200                // This is an equality immediately after a conflict
201                // marker. If there was a line added just before the
202                // beginning of the conflict, add an edge. Else, don't
203                // do anything.
204                amend_down_context(diff, i0, cursors.i)
205            } else {
206                // End of the conflict. There might have been
207                // additions just before a conflict marker (separator
208                // or end) of this conflict. We need to add all of
209                // them.
210                i0 += 1;
211                while i0 < cursors.i {
212                    amend_down_context(diff, i0, cursors.i);
213                    i0 += 1
214                }
215            }
216        }
217
218        //
219        if let Some(i0) = cursors.oi.take() {
220            // If we were collecting lines to delete (from i0, inclusive).
221            let i0 = cursors.leading_equals + i0;
222            let i = cursors.leading_equals + cursors.i;
223            debug!("deleting from {} to {} / {}", i0, i, diff.lines_a.len());
224            if i0 < i {
225                let dels = self.delete_lines(branch, diff, i0, i);
226                stop_del(diff.file.clone(), cursors, dels, actions)
227            }
228        } else if let Some(j0) = cursors.oj.take() {
229            // Else, if we were collecting lines to add (from j0, inclusive).
230            let j0 = cursors.leading_equals + j0;
231            let j = cursors.leading_equals + cursors.j;
232            let i = cursors.leading_equals + cursors.i;
233            debug!(
234                "adding from {} to {} / {}, context {}",
235                j0,
236                cursors.j,
237                b.len(),
238                cursors.last_alive_context
239            );
240            if j0 < j {
241                let adds = self.add_lines(
242                    cursors.last_alive_context,
243                    Some(i),
244                    diff,
245                    &b[j0..j],
246                    cursors.trailing_equals,
247                );
248                stop_add(diff.file.clone(), cursors, adds, actions)
249            }
250        }
251        // "Confirm" line i / j, if it is a zombie line.
252        self.confirm_zombie(
253            branch,
254            diff,
255            actions,
256            diff.lines_a[cursors.leading_equals + cursors.i],
257        );
258
259        // Move on to the next step.
260        cursors.last_alive_context = cursors.leading_equals + cursors.i;
261        cursors.i += 1;
262        cursors.j += 1;
263    }
264
265    fn move_i(&self, diff: &mut Diff<A>, b: &[&[u8]], cursors: &mut Cursors) {
266        // We will delete things starting from i (included).
267        // If we are currently adding stuff, finish that.
268        if let Some(j0) = cursors.oj.take() {
269            let j0 = cursors.leading_equals + j0;
270            let j = cursors.leading_equals + cursors.j;
271            let i = cursors.leading_equals + cursors.i;
272            debug!(
273                "adding from {} to {} / {}, context {}",
274                j0,
275                j,
276                b.len(),
277                cursors.last_alive_context
278            );
279
280            if j0 < j {
281                // Since we either always choose deletions
282                // or always additions, we can't have two
283                // consecutive replacements, hence there
284                // should be no pending change.
285                assert!(cursors.pending.is_none());
286                cursors.pending = Pending::Addition(self.add_lines(
287                    cursors.last_alive_context,
288                    Some(i),
289                    diff,
290                    &b[j0..j],
291                    cursors.trailing_equals,
292                ))
293            }
294        }
295        if cursors.oi.is_none() {
296            cursors.oi = Some(cursors.i)
297        }
298        cursors.i += 1
299    }
300
301    fn finish_i(
302        &self,
303        branch: &Branch,
304        diff: &mut Diff<A>,
305        b: &[&[u8]],
306        cursors: &mut Cursors,
307        actions: &mut Vec<Record<Rc<RefCell<ChangeContext<PatchId>>>>>,
308    ) {
309        let i = cursors.leading_equals + cursors.i;
310        let j = cursors.leading_equals + cursors.j;
311        if let Some(j0) = cursors.oj {
312            // Before stopping, we were adding lines.
313            let j0 = cursors.leading_equals + j0;
314            if j0 < j {
315                debug!(
316                    "line {}, adding remaining lines before the last deletion i={} j={} j0={}",
317                    line!(),
318                    i,
319                    j0,
320                    j
321                );
322                assert!(cursors.pending.is_none());
323                cursors.pending = Pending::Addition(self.add_lines(
324                    i - 1,
325                    Some(i),
326                    diff,
327                    &b[j0..j],
328                    cursors.trailing_equals,
329                ));
330            }
331        }
332
333        let i0 = diff.lines_a.len() - cursors.trailing_equals;
334        let dels = self.delete_lines(branch, diff, i, i0);
335        stop_del(diff.file.clone(), cursors, dels, actions);
336    }
337
338    fn move_j(&self, branch: &Branch, diff: &mut Diff<A>, cursors: &mut Cursors) {
339        // We will add things starting from j.
340        // Are we currently deleting stuff?
341        if let Some(i0) = cursors.oi.take() {
342            let i0 = cursors.leading_equals + i0;
343            let i = cursors.leading_equals + cursors.i;
344            if i0 < i {
345                assert!(cursors.pending.is_none());
346                cursors.pending = Pending::Deletion(self.delete_lines(branch, diff, i0, i))
347            }
348            cursors.last_alive_context = i0 - 1;
349        }
350        if cursors.oj.is_none() {
351            cursors.oj = Some(cursors.j)
352        }
353        cursors.j += 1
354    }
355
356    fn finish_j(
357        &self,
358        branch: &Branch,
359        diff: &mut Diff<A>,
360        b: &[&[u8]],
361        cursors: &mut Cursors,
362        actions: &mut Vec<Record<Rc<RefCell<ChangeContext<PatchId>>>>>,
363    ) {
364        // There's a pending block to add at the end of the file.
365        let j = cursors.leading_equals + cursors.j;
366        let mut i = cursors.leading_equals + cursors.i;
367        if let Some(i0) = cursors.oi {
368            // We were doing a deletion when we stopped.
369            let i0 = cursors.leading_equals + i0;
370            if i0 < i {
371                assert!(cursors.pending.is_none());
372                cursors.pending = Pending::Deletion(self.delete_lines(branch, diff, i0, i));
373            }
374            i = i0;
375            debug!(
376                "line {}, adding lines after trailing equals: {:?} {:?}",
377                line!(),
378                diff.lines_a.len(),
379                cursors.trailing_equals
380            );
381        }
382
383        let adds = self.add_lines(
384            i - 1,
385            if cursors.trailing_equals > 0 {
386                Some(diff.lines_a.len() - cursors.trailing_equals)
387            } else {
388                None
389            },
390            diff,
391            &b[j..b.len() - cursors.trailing_equals],
392            cursors.trailing_equals,
393        );
394
395        stop_add(diff.file.clone(), cursors, adds, actions)
396    }
397
398    fn local_diff<'a>(
399        &'a self,
400        branch: &Branch,
401        actions: &mut Vec<Record<Rc<RefCell<ChangeContext<PatchId>>>>>,
402        diff: &mut Diff<A>,
403        b: &[&'a [u8]],
404    ) {
405        debug!("local_diff {} {}", diff.contents_a.len(), b.len());
406
407        // Compute the costs.
408        let mut cursors = bracket_equals(diff, b);
409
410        debug!("equals: {:?}", cursors);
411
412        let mut opt = Matrix::new(
413            diff.contents_a.len() + 1 - cursors.leading_equals - cursors.trailing_equals,
414            b.len() + 1 - cursors.leading_equals - cursors.trailing_equals,
415            0,
416        );
417        debug!("opt.rows: {:?}, opt.cols: {:?}", opt.rows, opt.cols);
418        compute_costs(
419            diff,
420            b,
421            cursors.leading_equals,
422            cursors.trailing_equals,
423            &mut opt,
424        );
425
426        while cursors.i < opt.rows - 1 && cursors.j < opt.cols - 1 {
427            debug!("i={}, j={}", cursors.i, cursors.j);
428            let contents_a_i = diff.contents_a[cursors.leading_equals + cursors.i].clone();
429            let contents_b_j: Value<'a, A> =
430                Value::from_slice(b[cursors.leading_equals + cursors.j]);
431            trace!("c_a_i = {:?} c_a_j = {:?}", contents_a_i, contents_b_j);
432            if contents_a_i.eq(contents_b_j) {
433                self.lines_eq(branch, diff, b, &mut cursors, actions)
434            } else {
435                // Else, the current lines on each side are not equal:
436                debug!("not eq");
437                if opt[cursors.i + 1][cursors.j] >= opt[cursors.i][cursors.j + 1] {
438                    debug!("move i");
439                    self.move_i(diff, b, &mut cursors)
440                } else {
441                    debug!("move j");
442                    self.move_j(branch, diff, &mut cursors)
443                }
444            }
445        }
446        // Alright, we're at the end of either the original file, or the new version.
447        debug!("i = {:?}, j = {:?}", cursors.i, cursors.j);
448        // debug!("line_a {:?}, b {:?}", i, j, diff.lines_a, b);
449        if cursors.i < opt.rows - 1 {
450            // There are remaining deletions, i.e. things from the
451            // original file are not in the new version.
452            self.finish_i(branch, diff, b, &mut cursors, actions)
453        } else if cursors.j < opt.cols - 1 {
454            self.finish_j(branch, diff, b, &mut cursors, actions)
455        }
456    }
457
458    pub fn diff<'a>(
459        &'a self,
460        inode: Key<Option<Hash>>,
461        branch: &'a Branch,
462        file: Rc<PathBuf>,
463        line_num: &mut LineId,
464        actions: &mut Vec<Record<Rc<RefCell<ChangeContext<PatchId>>>>>,
465        redundant: &mut Vec<(Key<PatchId>, Edge)>,
466        a: &mut Graph,
467        lines_b: &[&[u8]],
468    ) -> Result<()> {
469        debug!("a = {:?}", a);
470        let mut d = Diff {
471            lines_a: Vec::new(),
472            contents_a: Vec::new(),
473            conflicts_ancestors: HashMap::new(),
474            current_conflict_ancestor: Vec::new(),
475            conflicts_descendants: HashMap::new(),
476            conflicts_down_contexts: HashMap::new(),
477            conflicts_sides: HashMap::new(),
478            status: HashMap::new(),
479            inode,
480            file,
481            line_num,
482        };
483        self.output_file(branch, &mut d, a, redundant)?;
484        debug!("d = {:?}, {:?}", d.lines_a, d.contents_a);
485        self.local_diff(branch, actions, &mut d, &lines_b);
486        Ok(())
487    }
488}
489
490// buf_b should initially contain the whole file.
491pub fn read_lines(buf_b: &[u8]) -> Vec<&[u8]> {
492    let mut lines_b = Vec::new();
493    let mut i = 0;
494    let mut j = 0;
495
496    while j < buf_b.len() {
497        if buf_b[j] == 0xa {
498            lines_b.push(&buf_b[i..j + 1]);
499            i = j + 1
500        }
501        j += 1;
502    }
503    if i < j {
504        lines_b.push(&buf_b[i..j])
505    }
506    lines_b
507}
508
509fn bracket_equals<A: Transaction>(diff: &Diff<A>, b: &[&[u8]]) -> Cursors {
510    // Start by computing the leading and trailing equalities.
511    let leading_equals = diff.contents_a.iter()
512        .skip(1)
513        .zip(b.iter())
514        .take_while(|&(a, b)| {
515            trace!("leading_equals: {:?} = {:?} ?", a, b);
516            let b: Value<A> = Value::from_slice(b);
517            a.clone().eq(b)
518        })
519        .count();
520
521    let trailing_equals =
522        if leading_equals >= min(diff.contents_a.len(), b.len()) {
523            0
524        } else {
525            (&diff.contents_a[leading_equals..]).iter().rev()
526                .zip((&b[leading_equals..]).iter().rev())
527                .take_while(|&(a, b)| {
528                    trace!("trailing_equals: {:?} = {:?} ?", a, b);
529                    let b: Value<A> = Value::from_slice(b);
530                    a.clone().eq(b)
531                })
532                .count()
533        };
534
535    // Create the patches.
536    Cursors {
537        i: 1,
538        j: 0,
539        oi: None,
540        oj: None,
541        last_alive_context: leading_equals,
542        pending: Pending::None,
543        leading_equals,
544        trailing_equals,
545    }
546}
547
548fn compute_costs<A: Transaction>(
549    diff: &Diff<A>,
550    b: &[&[u8]],
551    leading_equals: usize,
552    trailing_equals: usize,
553    opt: &mut Matrix<usize>,
554) {
555    if diff.contents_a.len() - trailing_equals - leading_equals > 0 {
556        let mut i = diff.contents_a.len() - 1 - trailing_equals - leading_equals;
557        loop {
558            if b.len() - trailing_equals - leading_equals > 0 {
559                let mut j = b.len() - 1 - trailing_equals - leading_equals;
560                loop {
561                    let contents_a_i = diff.contents_a[leading_equals + i].clone();
562                    let contents_b_j: Value<A> = Value::from_slice(&b[leading_equals + j]);
563                    opt[i][j] = if contents_a_i.eq(contents_b_j) {
564                        opt[i + 1][j + 1] + 1
565                    } else {
566                        std::cmp::max(opt[i + 1][j], opt[i][j + 1])
567                    };
568                    if j > 0 {
569                        j -= 1
570                    } else {
571                        break;
572                    }
573                }
574            }
575            if i > 0 {
576                i -= 1
577            } else {
578                break;
579            }
580        }
581    }
582}
583
584fn stop_del(
585    file: Rc<PathBuf>,
586    cursors: &mut Cursors,
587    dels: Deletion,
588    actions: &mut Vec<Record<Rc<RefCell<ChangeContext<PatchId>>>>>,
589) {
590    if let Pending::Addition(pending) = cursors.pending.take() {
591        if let Some(del) = dels.del {
592            actions.push(Record::Replace {
593                dels: del,
594                adds: pending,
595                file: file.clone(),
596                conflict_reordering: dels.conflict_ordering,
597            })
598        } else {
599            actions.push(Record::Change {
600                change: pending,
601                file: file.clone(),
602                conflict_reordering: dels.conflict_ordering,
603            })
604        }
605    } else if let Some(del) = dels.del {
606        actions.push(Record::Change {
607            change: del,
608            file: file.clone(),
609            conflict_reordering: dels.conflict_ordering,
610        })
611    } else {
612        for reord in dels.conflict_ordering {
613            actions.push(Record::Change {
614                change: reord,
615                file: file.clone(),
616                conflict_reordering: Vec::new(),
617            })
618        }
619    }
620}
621
622fn stop_add(
623    file: Rc<PathBuf>,
624    cursors: &mut Cursors,
625    adds: Change<Rc<RefCell<ChangeContext<PatchId>>>>,
626    actions: &mut Vec<Record<Rc<RefCell<ChangeContext<PatchId>>>>>,
627) {
628    if let Pending::Deletion(pending) = cursors.pending.take() {
629        if let Some(del) = pending.del {
630            actions.push(Record::Replace {
631                dels: del,
632                adds: adds,
633                file: file.clone(),
634                conflict_reordering: pending.conflict_ordering,
635            });
636        } else {
637            actions.push(Record::Change {
638                change: adds,
639                file: file.clone(),
640                conflict_reordering: pending.conflict_ordering,
641            })
642        }
643    } else {
644        actions.push(Record::Change {
645            change: adds,
646            file: file.clone(),
647            conflict_reordering: Vec::new(),
648        })
649    }
650}