Skip to main content

gix_diff/tree/
function.rs

1use std::{borrow::BorrowMut, collections::VecDeque};
2
3use gix_object::{tree::EntryRef, FindExt, TreeRefIter};
4
5use crate::tree::{
6    visit::{Change, ChangeId, Relation},
7    Error, State, TreeInfoTuple, Visit,
8};
9
10/// Calculate the changes that would need to be applied to `lhs` to get `rhs` using `objects` to obtain objects as needed for traversal.
11/// `state` can be used between multiple calls to re-use memory.
12///
13/// * The `state` maybe owned or mutably borrowed to allow reuses allocated data structures through multiple runs.
14/// * `delegate` will receive the computed changes, see the [`Visit`] trait for more information on what to expect.
15///
16/// # Notes
17///
18/// * `lhs` can be an empty tree to simulate what would happen if the left-hand side didn't exist.
19/// * To obtain progress, implement it within the `delegate`.
20/// * Tree entries are expected to be ordered using [`tree-entry-comparison`][git_cmp_c] (the same [in Rust][git_cmp_rs])
21/// * it does a breadth first iteration as buffer space only fits two trees, the current one on the one we compare with.
22/// * does not do rename tracking but attempts to reduce allocations to zero (so performance is mostly determined
23///   by the delegate implementation which should be as specific as possible. Rename tracking can be computed on top of the changes
24///   received by the `delegate`.
25/// * cycle checking is not performed, but can be performed in the delegate which can return
26///   [`std::ops::ControlFlow::Break`] to stop the traversal.
27///
28/// [git_cmp_c]: https://github.com/git/git/blob/ef8ce8f3d4344fd3af049c17eeba5cd20d98b69f/tree-diff.c#L72-L88
29/// [git_cmp_rs]: https://github.com/GitoxideLabs/gitoxide/blob/795962b107d86f58b1f7c75006da256d19cc80ad/gix-object/src/tree/mod.rs#L263-L273
30#[doc(alias = "diff_tree_to_tree", alias = "git2")]
31pub fn diff<StateMut>(
32    lhs: TreeRefIter<'_>,
33    rhs: TreeRefIter<'_>,
34    mut state: StateMut,
35    objects: impl gix_object::Find,
36    delegate: &mut impl Visit,
37) -> Result<(), Error>
38where
39    StateMut: BorrowMut<State>,
40{
41    let state = state.borrow_mut();
42    state.clear();
43    let mut lhs_entries = peekable(lhs);
44    let mut rhs_entries = peekable(rhs);
45    let mut relation = None;
46    let mut pop_path = false;
47
48    loop {
49        if pop_path {
50            delegate.pop_path_component();
51        }
52        pop_path = true;
53
54        match (lhs_entries.next(), rhs_entries.next()) {
55            (None, None) => {
56                match state.trees.pop_front() {
57                    Some((None, Some(rhs), relation_to_propagate)) => {
58                        delegate.pop_front_tracked_path_and_set_current();
59                        relation = relation_to_propagate;
60                        rhs_entries = peekable(objects.find_tree_iter(&rhs, &mut state.buf2)?);
61                    }
62                    Some((Some(lhs), Some(rhs), relation_to_propagate)) => {
63                        delegate.pop_front_tracked_path_and_set_current();
64                        lhs_entries = peekable(objects.find_tree_iter(&lhs, &mut state.buf1)?);
65                        rhs_entries = peekable(objects.find_tree_iter(&rhs, &mut state.buf2)?);
66                        relation = relation_to_propagate;
67                    }
68                    Some((Some(lhs), None, relation_to_propagate)) => {
69                        delegate.pop_front_tracked_path_and_set_current();
70                        lhs_entries = peekable(objects.find_tree_iter(&lhs, &mut state.buf1)?);
71                        relation = relation_to_propagate;
72                    }
73                    Some((None, None, _)) => unreachable!("BUG: it makes no sense to fill the stack with empties"),
74                    None => return Ok(()),
75                }
76                pop_path = false;
77            }
78            (Some(lhs), Some(rhs)) => {
79                use std::cmp::Ordering::*;
80                let (lhs, rhs) = (lhs?, rhs?);
81                match compare(&lhs, &rhs) {
82                    Equal => handle_lhs_and_rhs_with_equal_filenames(
83                        lhs,
84                        rhs,
85                        &mut state.trees,
86                        &mut state.change_id,
87                        relation,
88                        delegate,
89                    )?,
90                    Less => catchup_lhs_with_rhs(
91                        &mut lhs_entries,
92                        lhs,
93                        rhs,
94                        &mut state.trees,
95                        &mut state.change_id,
96                        relation,
97                        delegate,
98                    )?,
99                    Greater => catchup_rhs_with_lhs(
100                        &mut rhs_entries,
101                        lhs,
102                        rhs,
103                        &mut state.trees,
104                        &mut state.change_id,
105                        relation,
106                        delegate,
107                    )?,
108                }
109            }
110            (Some(lhs), None) => {
111                let lhs = lhs?;
112                delete_entry_schedule_recursion(lhs, &mut state.trees, &mut state.change_id, relation, delegate)?;
113            }
114            (None, Some(rhs)) => {
115                let rhs = rhs?;
116                add_entry_schedule_recursion(rhs, &mut state.trees, &mut state.change_id, relation, delegate)?;
117            }
118        }
119    }
120}
121
122fn compare(a: &EntryRef<'_>, b: &EntryRef<'_>) -> std::cmp::Ordering {
123    let common = a.filename.len().min(b.filename.len());
124    a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
125        let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
126        let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
127        a.cmp(&b)
128    })
129}
130
131fn delete_entry_schedule_recursion(
132    entry: EntryRef<'_>,
133    queue: &mut VecDeque<TreeInfoTuple>,
134    change_id: &mut ChangeId,
135    relation_to_propagate: Option<Relation>,
136    delegate: &mut impl Visit,
137) -> Result<(), Error> {
138    delegate.push_path_component(entry.filename);
139    let relation = relation_to_propagate.or_else(|| {
140        entry.mode.is_tree().then(|| {
141            *change_id += 1;
142            Relation::Parent(*change_id)
143        })
144    });
145    let is_cancelled = delegate
146        .visit(Change::Deletion {
147            entry_mode: entry.mode,
148            oid: entry.oid.to_owned(),
149            relation,
150        })
151        .is_break();
152    if is_cancelled {
153        return Err(Error::Cancelled);
154    }
155    if entry.mode.is_tree() {
156        delegate.pop_path_component();
157        delegate.push_back_tracked_path_component(entry.filename);
158        queue.push_back((Some(entry.oid.to_owned()), None, to_child(relation)));
159    }
160    Ok(())
161}
162
163fn add_entry_schedule_recursion(
164    entry: EntryRef<'_>,
165    queue: &mut VecDeque<TreeInfoTuple>,
166    change_id: &mut ChangeId,
167    relation_to_propagate: Option<Relation>,
168    delegate: &mut impl Visit,
169) -> Result<(), Error> {
170    delegate.push_path_component(entry.filename);
171    let relation = relation_to_propagate.or_else(|| {
172        entry.mode.is_tree().then(|| {
173            *change_id += 1;
174            Relation::Parent(*change_id)
175        })
176    });
177    if delegate
178        .visit(Change::Addition {
179            entry_mode: entry.mode,
180            oid: entry.oid.to_owned(),
181            relation,
182        })
183        .is_break()
184    {
185        return Err(Error::Cancelled);
186    }
187    if entry.mode.is_tree() {
188        delegate.pop_path_component();
189        delegate.push_back_tracked_path_component(entry.filename);
190        queue.push_back((None, Some(entry.oid.to_owned()), to_child(relation)));
191    }
192    Ok(())
193}
194
195fn catchup_rhs_with_lhs(
196    rhs_entries: &mut IteratorType<TreeRefIter<'_>>,
197    lhs: EntryRef<'_>,
198    rhs: EntryRef<'_>,
199    queue: &mut VecDeque<TreeInfoTuple>,
200    change_id: &mut ChangeId,
201    relation_to_propagate: Option<Relation>,
202    delegate: &mut impl Visit,
203) -> Result<(), Error> {
204    use std::cmp::Ordering::*;
205    add_entry_schedule_recursion(rhs, queue, change_id, relation_to_propagate, delegate)?;
206    loop {
207        match rhs_entries.peek() {
208            Some(Ok(rhs)) => match compare(&lhs, rhs) {
209                Equal => {
210                    let rhs = rhs_entries.next().transpose()?.expect("the peeked item to be present");
211                    delegate.pop_path_component();
212                    handle_lhs_and_rhs_with_equal_filenames(
213                        lhs,
214                        rhs,
215                        queue,
216                        change_id,
217                        relation_to_propagate,
218                        delegate,
219                    )?;
220                    break;
221                }
222                Greater => {
223                    let rhs = rhs_entries.next().transpose()?.expect("the peeked item to be present");
224                    delegate.pop_path_component();
225                    add_entry_schedule_recursion(rhs, queue, change_id, relation_to_propagate, delegate)?;
226                }
227                Less => {
228                    delegate.pop_path_component();
229                    delete_entry_schedule_recursion(lhs, queue, change_id, relation_to_propagate, delegate)?;
230                    break;
231                }
232            },
233            Some(Err(err)) => return Err(Error::EntriesDecode(err.to_owned())),
234            None => {
235                delegate.pop_path_component();
236                delete_entry_schedule_recursion(lhs, queue, change_id, relation_to_propagate, delegate)?;
237                break;
238            }
239        }
240    }
241    Ok(())
242}
243
244fn catchup_lhs_with_rhs(
245    lhs_entries: &mut IteratorType<TreeRefIter<'_>>,
246    lhs: EntryRef<'_>,
247    rhs: EntryRef<'_>,
248    queue: &mut VecDeque<TreeInfoTuple>,
249    change_id: &mut ChangeId,
250    relation_to_propagate: Option<Relation>,
251    delegate: &mut impl Visit,
252) -> Result<(), Error> {
253    use std::cmp::Ordering::*;
254    delete_entry_schedule_recursion(lhs, queue, change_id, relation_to_propagate, delegate)?;
255    loop {
256        match lhs_entries.peek() {
257            Some(Ok(lhs)) => match compare(lhs, &rhs) {
258                Equal => {
259                    let lhs = lhs_entries.next().expect("the peeked item to be present")?;
260                    delegate.pop_path_component();
261                    handle_lhs_and_rhs_with_equal_filenames(
262                        lhs,
263                        rhs,
264                        queue,
265                        change_id,
266                        relation_to_propagate,
267                        delegate,
268                    )?;
269                    break;
270                }
271                Less => {
272                    let lhs = lhs_entries.next().expect("the peeked item to be present")?;
273                    delegate.pop_path_component();
274                    delete_entry_schedule_recursion(lhs, queue, change_id, relation_to_propagate, delegate)?;
275                }
276                Greater => {
277                    delegate.pop_path_component();
278                    add_entry_schedule_recursion(rhs, queue, change_id, relation_to_propagate, delegate)?;
279                    break;
280                }
281            },
282            Some(Err(err)) => return Err(Error::EntriesDecode(err.to_owned())),
283            None => {
284                delegate.pop_path_component();
285                add_entry_schedule_recursion(rhs, queue, change_id, relation_to_propagate, delegate)?;
286                break;
287            }
288        }
289    }
290    Ok(())
291}
292
293fn handle_lhs_and_rhs_with_equal_filenames(
294    lhs: EntryRef<'_>,
295    rhs: EntryRef<'_>,
296    queue: &mut VecDeque<TreeInfoTuple>,
297    change_id: &mut ChangeId,
298    relation_to_propagate: Option<Relation>,
299    delegate: &mut impl Visit,
300) -> Result<(), Error> {
301    match (lhs.mode.is_tree(), rhs.mode.is_tree()) {
302        (true, true) => {
303            if lhs.oid == rhs.oid {
304                // If the tree oids are identical, we won't bother recursing
305                // into this subtree as the entire tree is identical.
306                // For path management purposes, treat it like a skipped blob.
307                delegate.push_path_component(lhs.filename);
308            } else {
309                delegate.push_back_tracked_path_component(lhs.filename);
310                if delegate
311                    .visit(Change::Modification {
312                        previous_entry_mode: lhs.mode,
313                        previous_oid: lhs.oid.to_owned(),
314                        entry_mode: rhs.mode,
315                        oid: rhs.oid.to_owned(),
316                    })
317                    .is_break()
318                {
319                    return Err(Error::Cancelled);
320                }
321                queue.push_back((
322                    Some(lhs.oid.to_owned()),
323                    Some(rhs.oid.to_owned()),
324                    relation_to_propagate,
325                ));
326            }
327        }
328        (_, true) => {
329            delegate.push_back_tracked_path_component(lhs.filename);
330            if delegate
331                .visit(Change::Deletion {
332                    entry_mode: lhs.mode,
333                    oid: lhs.oid.to_owned(),
334                    relation: None,
335                })
336                .is_break()
337            {
338                return Err(Error::Cancelled);
339            }
340
341            let relation = relation_to_propagate.or_else(|| {
342                *change_id += 1;
343                Some(Relation::Parent(*change_id))
344            });
345            if delegate
346                .visit(Change::Addition {
347                    entry_mode: rhs.mode,
348                    oid: rhs.oid.to_owned(),
349                    relation,
350                })
351                .is_break()
352            {
353                return Err(Error::Cancelled);
354            }
355            queue.push_back((None, Some(rhs.oid.to_owned()), to_child(relation)));
356        }
357        (true, _) => {
358            delegate.push_back_tracked_path_component(lhs.filename);
359            let relation = relation_to_propagate.or_else(|| {
360                *change_id += 1;
361                Some(Relation::Parent(*change_id))
362            });
363            if delegate
364                .visit(Change::Deletion {
365                    entry_mode: lhs.mode,
366                    oid: lhs.oid.to_owned(),
367                    relation,
368                })
369                .is_break()
370            {
371                return Err(Error::Cancelled);
372            }
373            if delegate
374                .visit(Change::Addition {
375                    entry_mode: rhs.mode,
376                    oid: rhs.oid.to_owned(),
377                    relation: None,
378                })
379                .is_break()
380            {
381                return Err(Error::Cancelled);
382            }
383            queue.push_back((Some(lhs.oid.to_owned()), None, to_child(relation)));
384        }
385        (false, false) => {
386            delegate.push_path_component(lhs.filename);
387            debug_assert!(lhs.mode.is_no_tree() && lhs.mode.is_no_tree());
388            if (lhs.oid != rhs.oid || lhs.mode != rhs.mode)
389                && delegate
390                    .visit(Change::Modification {
391                        previous_entry_mode: lhs.mode,
392                        previous_oid: lhs.oid.to_owned(),
393                        entry_mode: rhs.mode,
394                        oid: rhs.oid.to_owned(),
395                    })
396                    .is_break()
397            {
398                return Err(Error::Cancelled);
399            }
400        }
401    }
402    Ok(())
403}
404
405type IteratorType<I> = std::iter::Peekable<I>;
406
407fn to_child(r: Option<Relation>) -> Option<Relation> {
408    r.map(|r| match r {
409        Relation::Parent(id) => Relation::ChildOfParent(id),
410        Relation::ChildOfParent(id) => Relation::ChildOfParent(id),
411    })
412}
413
414fn peekable<I: Iterator>(iter: I) -> IteratorType<I> {
415    iter.peekable()
416}
417
418#[cfg(test)]
419mod tests {
420    use std::cmp::Ordering;
421
422    use gix_object::tree::EntryKind;
423
424    use super::*;
425
426    #[test]
427    fn compare_select_samples() {
428        let null = gix_hash::ObjectId::null(gix_hash::Kind::Sha1);
429        let actual = compare(
430            &EntryRef {
431                mode: EntryKind::Blob.into(),
432                filename: "plumbing-cli.rs".into(),
433                oid: &null,
434            },
435            &EntryRef {
436                mode: EntryKind::Tree.into(),
437                filename: "plumbing".into(),
438                oid: &null,
439            },
440        );
441        assert_eq!(actual, Ordering::Less);
442        let actual = compare(
443            &EntryRef {
444                mode: EntryKind::Tree.into(),
445                filename: "plumbing-cli.rs".into(),
446                oid: &null,
447            },
448            &EntryRef {
449                mode: EntryKind::Blob.into(),
450                filename: "plumbing".into(),
451                oid: &null,
452            },
453        );
454        assert_eq!(actual, Ordering::Greater);
455    }
456}