Skip to main content

gix_object/tree/
editor.rs

1use std::{
2    cmp::Ordering,
3    collections::{HashMap, hash_map},
4    fmt::Formatter,
5};
6
7use bstr::{BStr, BString, ByteSlice, ByteVec};
8use gix_hash::ObjectId;
9
10use crate::{
11    Tree, tree,
12    tree::{Editor, EntryKind},
13};
14
15/// A way to constrain all [tree-edits](Editor) to a given subtree.
16pub struct Cursor<'a, 'find> {
17    /// The underlying editor
18    parent: &'a mut Editor<'find>,
19    /// Our own location, used as prefix for all operations.
20    /// Note that it's assumed to always contain a tree.
21    prefix: BString,
22}
23
24impl std::fmt::Debug for Editor<'_> {
25    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26        f.debug_struct("Editor")
27            .field("object_hash", &self.object_hash)
28            .field("path_buf", &self.path_buf)
29            .field("trees", &self.trees)
30            .finish()
31    }
32}
33
34/// The error returned by [Editor] or [Cursor] edit operation.
35#[derive(Debug, thiserror::Error)]
36#[allow(missing_docs)]
37pub enum Error {
38    #[error("Empty path components are not allowed")]
39    EmptyPathComponent,
40    #[error(transparent)]
41    FindExistingObject(#[from] crate::find::existing_object::Error),
42    #[error("Cannot remove '{rela_path}' as leaf entry because it is a tree")]
43    CannotRemoveNonLeaf { rela_path: BString },
44}
45
46/// Lifecycle
47impl<'a> Editor<'a> {
48    /// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing
49    /// trees when edits are made. Each tree will only be looked-up once and then edited in place from
50    /// that point on.
51    /// `object_hash` denotes the kind of hash to create.
52    pub fn new(root: Tree, find: &'a dyn crate::FindExt, object_hash: gix_hash::Kind) -> Self {
53        Editor {
54            find,
55            object_hash,
56            trees: HashMap::from_iter(Some((empty_path(), root))),
57            path_buf: BString::from(Vec::with_capacity(256)).into(),
58            tree_buf: Vec::with_capacity(512),
59        }
60    }
61}
62
63/// Operations
64impl Editor<'_> {
65    /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`, and remove
66    /// written portions from our state except for the root tree, which affects [`get()`](Editor::get()).
67    /// Note that the returned object id *can* be the empty tree if everything was removed or if nothing
68    /// was added to the tree.
69    ///
70    /// The last call to `out` will be the changed root tree, whose object-id will also be returned.
71    /// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist.
72    /// We don't assure that as there is no validation that inserted entries are valid object ids.
73    ///
74    /// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the
75    /// just-written root-tree.
76    /// If this is not desired, use [set_root()](Self::set_root()).
77    ///
78    /// ### Validation
79    ///
80    /// Note that no additional validation is performed to assure correctness of entry-names.
81    /// It is absolutely and intentionally possible to write out invalid trees with this method.
82    /// Higher layers are expected to perform detailed validation.
83    pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
84        self.path_buf.borrow_mut().clear();
85        self.write_at_pathbuf(out, WriteMode::Normal)
86    }
87
88    /// Remove the entry at `rela_path`, loading all trees on the path accordingly.
89    /// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
90    pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, Error>
91    where
92        I: IntoIterator<Item = C>,
93        C: AsRef<BStr>,
94    {
95        self.path_buf.borrow_mut().clear();
96        self.upsert_or_remove_at_pathbuf(rela_path, EditMode::Remove(RemoveMode::Any))
97    }
98
99    /// Remove a non-tree entry at `rela_path`, loading all trees on the path accordingly.
100    /// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
101    ///
102    /// Return an error if the entry exists and is a tree, as that would otherwise also remove all entries below it.
103    /// Empty path components are rejected if reached while loading the path. This is useful to not unintentionally
104    /// remove a directory.
105    pub fn remove_leaf<I, C>(&mut self, rela_path: I) -> Result<&mut Self, Error>
106    where
107        I: IntoIterator<Item = C>,
108        C: AsRef<BStr>,
109    {
110        self.path_buf.borrow_mut().clear();
111        self.upsert_or_remove_at_pathbuf(rela_path, EditMode::Remove(RemoveMode::LeafOnly))
112    }
113
114    /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written
115    /// to that point.
116    /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed.
117    /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly
118    /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory.
119    pub fn get<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
120    where
121        I: IntoIterator<Item = C>,
122        C: AsRef<BStr>,
123    {
124        self.path_buf.borrow_mut().clear();
125        self.get_inner(rela_path)
126    }
127
128    /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree,
129    /// like `a/b/c`. Names are matched case-sensitively.
130    ///
131    /// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database
132    /// or will be made available at a later point to assure the integrity of the produced tree.
133    ///
134    /// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries
135    /// will be inserted into them instead.
136    ///
137    /// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading
138    /// through them will not be considered a problem.
139    ///
140    /// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
141    /// in Git trees.
142    pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, Error>
143    where
144        I: IntoIterator<Item = C>,
145        C: AsRef<BStr>,
146    {
147        self.path_buf.borrow_mut().clear();
148        self.upsert_or_remove_at_pathbuf(rela_path, EditMode::Upsert(kind, id, UpsertMode::Normal))
149    }
150
151    fn get_inner<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
152    where
153        I: IntoIterator<Item = C>,
154        C: AsRef<BStr>,
155    {
156        let mut path_buf = self.path_buf.borrow_mut();
157        let mut cursor = self.trees.get(path_buf.as_bstr()).expect("root is always present");
158        let mut rela_path = rela_path.into_iter().peekable();
159        while let Some(name) = rela_path.next() {
160            let name = name.as_ref();
161            let is_last = rela_path.peek().is_none();
162            match cursor
163                .entries
164                .binary_search_by(|e| cmp_entry_with_name(e, name, true))
165                .or_else(|_| cursor.entries.binary_search_by(|e| cmp_entry_with_name(e, name, false)))
166            {
167                Ok(idx) if is_last => return Some(&cursor.entries[idx]),
168                Ok(idx) => {
169                    if cursor.entries[idx].mode.is_tree() {
170                        push_path_component(&mut path_buf, name);
171                        cursor = self.trees.get(path_buf.as_bstr())?;
172                    } else {
173                        break;
174                    }
175                }
176                Err(_) => break,
177            }
178        }
179        None
180    }
181
182    fn write_at_pathbuf<E>(
183        &mut self,
184        mut out: impl FnMut(&Tree) -> Result<ObjectId, E>,
185        mode: WriteMode,
186    ) -> Result<ObjectId, E> {
187        assert_ne!(self.trees.len(), 0, "there is at least the root tree");
188
189        // back is for children, front is for parents.
190        let path_buf = self.path_buf.borrow_mut();
191        let mut parents = vec![(
192            None::<usize>,
193            path_buf.clone(),
194            self.trees
195                .remove(path_buf.as_bstr())
196                .expect("root tree is always present"),
197        )];
198        let mut children = Vec::new();
199        while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) {
200            let mut all_entries_unchanged_or_written = true;
201            for entry in &tree.entries {
202                if entry.mode.is_tree() {
203                    let prev_len = push_path_component(&mut rela_path, &entry.filename);
204                    if let Some(sub_tree) = self.trees.remove(&rela_path) {
205                        all_entries_unchanged_or_written = false;
206                        let next_parent_idx = parents.len();
207                        children.push((Some(next_parent_idx), rela_path.clone(), sub_tree));
208                    }
209                    rela_path.truncate(prev_len);
210                }
211            }
212            if all_entries_unchanged_or_written {
213                tree.entries.retain(|e| !e.oid.is_null());
214                if let Some((_, _, parent_to_adjust)) =
215                    parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero"))
216                {
217                    let name = filename(rela_path.as_bstr());
218                    let entry_idx = parent_to_adjust
219                        .entries
220                        .binary_search_by(|e| cmp_entry_with_name(e, name, true))
221                        .expect("the parent always knows us by name");
222                    if tree.entries.is_empty() {
223                        parent_to_adjust.entries.remove(entry_idx);
224                    } else {
225                        match out(&tree) {
226                            Ok(id) => {
227                                parent_to_adjust.entries[entry_idx].oid = id;
228                            }
229                            Err(err) => {
230                                let root_tree = parents.into_iter().next().expect("root wasn't consumed yet");
231                                self.trees.insert(root_tree.1, root_tree.2);
232                                return Err(err);
233                            }
234                        }
235                    }
236                } else if parents.is_empty() {
237                    debug_assert!(children.is_empty(), "we consume children before parents");
238                    debug_assert_eq!(rela_path, **path_buf, "this should always be the root tree");
239
240                    // There may be left-over trees if they are replaced with blobs for example.
241                    match out(&tree) {
242                        Ok(id) => {
243                            let root_tree_id = id;
244                            match mode {
245                                WriteMode::Normal => {
246                                    self.trees.clear();
247                                }
248                                WriteMode::FromCursor => {}
249                            }
250                            self.trees.insert(rela_path, tree);
251                            return Ok(root_tree_id);
252                        }
253                        Err(err) => {
254                            self.trees.insert(rela_path, tree);
255                            return Err(err);
256                        }
257                    }
258                } else if !tree.entries.is_empty() {
259                    out(&tree)?;
260                }
261            } else {
262                parents.push((parent_idx, rela_path, tree));
263            }
264        }
265
266        unreachable!("we exit as soon as everything is consumed")
267    }
268
269    fn upsert_or_remove_at_pathbuf<I, C>(&mut self, rela_path: I, edit: EditMode) -> Result<&mut Self, Error>
270    where
271        I: IntoIterator<Item = C>,
272        C: AsRef<BStr>,
273    {
274        let mut path_buf = self.path_buf.borrow_mut();
275        let mut cursor = self.trees.get_mut(path_buf.as_bstr()).expect("root is always present");
276        let mut rela_path = rela_path.into_iter().peekable();
277        let new_kind_is_tree = matches!(edit, EditMode::Upsert(EntryKind::Tree, _, _));
278        while let Some(name) = rela_path.next() {
279            let name = name.as_ref();
280            if name.is_empty() {
281                return Err(Error::EmptyPathComponent);
282            }
283            let is_last = rela_path.peek().is_none();
284            let mut needs_sorting = false;
285            let current_level_must_be_tree = !is_last || new_kind_is_tree;
286            let check_type_change = |entry: &tree::Entry| entry.mode.is_tree() != current_level_must_be_tree;
287            let tree_to_lookup = match cursor
288                .entries
289                .binary_search_by(|e| cmp_entry_with_name(e, name, false))
290                .or_else(|file_insertion_idx| {
291                    cursor
292                        .entries
293                        .binary_search_by(|e| cmp_entry_with_name(e, name, true))
294                        .map_err(|dir_insertion_index| {
295                            if current_level_must_be_tree {
296                                dir_insertion_index
297                            } else {
298                                file_insertion_idx
299                            }
300                        })
301                }) {
302                Ok(idx) => {
303                    match edit {
304                        EditMode::Remove(mode) => {
305                            if is_last {
306                                if mode == RemoveMode::LeafOnly && cursor.entries[idx].mode.is_tree() {
307                                    return Err(Error::CannotRemoveNonLeaf {
308                                        rela_path: path_with_component(path_buf.as_bstr(), name),
309                                    });
310                                }
311                                cursor.entries.remove(idx);
312                                break;
313                            } else {
314                                let entry = &cursor.entries[idx];
315                                if entry.mode.is_tree() {
316                                    Some(entry.oid)
317                                } else {
318                                    break;
319                                }
320                            }
321                        }
322                        EditMode::Upsert(kind, id, _mode) => {
323                            let entry = &mut cursor.entries[idx];
324                            if is_last {
325                                // unconditionally overwrite what's there.
326                                entry.oid = id;
327                                needs_sorting = check_type_change(entry);
328                                entry.mode = kind.into();
329                                None
330                            } else if entry.mode.is_tree() {
331                                // Possibly lookup the existing tree on our way down the path.
332                                Some(entry.oid)
333                            } else {
334                                // it is no tree, but we are traversing a path, so turn it into one.
335                                entry.oid = id.kind().null();
336                                needs_sorting = check_type_change(entry);
337                                entry.mode = EntryKind::Tree.into();
338                                None
339                            }
340                        }
341                    }
342                }
343                Err(insertion_idx) => match edit {
344                    EditMode::Remove(_) => break,
345                    EditMode::Upsert(kind, id, _mode) => {
346                        cursor.entries.insert(
347                            insertion_idx,
348                            tree::Entry {
349                                filename: name.into(),
350                                mode: if is_last { kind.into() } else { EntryKind::Tree.into() },
351                                oid: if is_last { id } else { id.kind().null() },
352                            },
353                        );
354                        None
355                    }
356                },
357            };
358            if needs_sorting {
359                cursor.entries.sort();
360            }
361            if is_last && matches!(edit, EditMode::Upsert(_, _, UpsertMode::Normal)) {
362                break;
363            }
364            let stop_at_unedited_empty_tree = matches!(edit, EditMode::Remove(RemoveMode::LeafOnly))
365                && tree_to_lookup.is_some_and(|id| id.is_empty_tree());
366            push_path_component(&mut path_buf, name);
367            cursor = match self.trees.entry(path_buf.clone()) {
368                hash_map::Entry::Occupied(e) => e.into_mut(),
369                hash_map::Entry::Vacant(_) if stop_at_unedited_empty_tree => break,
370                hash_map::Entry::Vacant(e) => e.insert(
371                    if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) {
372                        self.find.find_tree(&tree_id, &mut self.tree_buf)?.into()
373                    } else {
374                        Tree::default()
375                    },
376                ),
377            };
378        }
379        drop(path_buf);
380        Ok(self)
381    }
382
383    /// Set the root tree of the modification to `root`, assuring it has a well-known state.
384    ///
385    /// Note that this erases all previous edits.
386    ///
387    /// This is useful if the same editor is re-used for various trees.
388    pub fn set_root(&mut self, root: Tree) -> &mut Self {
389        self.trees.clear();
390        self.trees.insert(empty_path(), root);
391        self
392    }
393}
394
395mod cursor {
396    use bstr::{BStr, BString};
397    use gix_hash::ObjectId;
398
399    use crate::{
400        Tree, tree,
401        tree::{
402            Editor, EntryKind,
403            editor::{Cursor, EditMode, RemoveMode, UpsertMode, WriteMode},
404        },
405    };
406
407    /// Cursor handling
408    impl<'a> Editor<'a> {
409        /// Turn ourselves as a cursor, which points to the same tree as the editor.
410        ///
411        /// This is useful if a method takes a [`Cursor`], not an [`Editor`].
412        pub fn to_cursor(&mut self) -> Cursor<'_, 'a> {
413            Cursor {
414                parent: self,
415                prefix: BString::default(),
416            }
417        }
418
419        /// Create a cursor at the given `rela_path`, which must be a tree or is turned into a tree as its own edit.
420        ///
421        /// The returned cursor will then allow applying edits to the tree at `rela_path` as root.
422        /// If `rela_path` is a single empty string, it is equivalent to using the current instance itself.
423        pub fn cursor_at<I, C>(&mut self, rela_path: I) -> Result<Cursor<'_, 'a>, super::Error>
424        where
425            I: IntoIterator<Item = C>,
426            C: AsRef<BStr>,
427        {
428            self.path_buf.borrow_mut().clear();
429            self.upsert_or_remove_at_pathbuf(
430                rela_path,
431                EditMode::Upsert(EntryKind::Tree, self.object_hash.null(), UpsertMode::AssureTreeOnly),
432            )?;
433            let prefix = self.path_buf.borrow_mut().clone();
434            Ok(Cursor {
435                prefix, /* set during the upsert call */
436                parent: self,
437            })
438        }
439    }
440
441    impl Cursor<'_, '_> {
442        /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written
443        /// to that point.
444        /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed.
445        /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly
446        /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory.
447        pub fn get<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
448        where
449            I: IntoIterator<Item = C>,
450            C: AsRef<BStr>,
451        {
452            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
453            self.parent.get_inner(rela_path)
454        }
455
456        /// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree.
457        pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, super::Error>
458        where
459            I: IntoIterator<Item = C>,
460            C: AsRef<BStr>,
461        {
462            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
463            self.parent
464                .upsert_or_remove_at_pathbuf(rela_path, EditMode::Upsert(kind, id, UpsertMode::Normal))?;
465            Ok(self)
466        }
467
468        /// Like [`Editor::remove()`], but with the constraint of only editing in this cursor's tree.
469        pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, super::Error>
470        where
471            I: IntoIterator<Item = C>,
472            C: AsRef<BStr>,
473        {
474            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
475            self.parent
476                .upsert_or_remove_at_pathbuf(rela_path, EditMode::Remove(RemoveMode::Any))?;
477            Ok(self)
478        }
479
480        /// Like [`Editor::remove_leaf()`], but with the constraint of only editing in this cursor's tree.
481        pub fn remove_leaf<I, C>(&mut self, rela_path: I) -> Result<&mut Self, super::Error>
482        where
483            I: IntoIterator<Item = C>,
484            C: AsRef<BStr>,
485        {
486            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
487            self.parent
488                .upsert_or_remove_at_pathbuf(rela_path, EditMode::Remove(RemoveMode::LeafOnly))?;
489            Ok(self)
490        }
491
492        /// Like [`Editor::write()`], but will write only the subtree of the cursor.
493        pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
494            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
495            self.parent.write_at_pathbuf(out, WriteMode::FromCursor)
496        }
497    }
498}
499
500#[derive(Copy, Clone, Eq, PartialEq)]
501enum UpsertMode {
502    Normal,
503    /// Only make sure there is a tree at the given location (requires kind tree and null-id)
504    AssureTreeOnly,
505}
506
507#[derive(Copy, Clone, Eq, PartialEq)]
508enum RemoveMode {
509    /// Remove any entry, including entire subtrees.
510    Any,
511    /// Only remove leaf entries, and reject an existing tree at the target path.
512    LeafOnly,
513}
514
515#[derive(Copy, Clone)]
516enum EditMode {
517    Remove(RemoveMode),
518    /// Insert or replace an entry of `kind` and `id` according to the given mode.
519    Upsert(EntryKind, ObjectId, UpsertMode),
520}
521
522enum WriteMode {
523    Normal,
524    /// Perform less cleanup to assure parent-editor still stays intact
525    FromCursor,
526}
527
528fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering {
529    let common = a.filename.len().min(filename.len());
530    a.filename[..common].cmp(&filename[..common]).then_with(|| {
531        let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
532        let b = filename.get(common).or_else(|| is_tree.then_some(&b'/'));
533        a.cmp(&b)
534    })
535}
536
537fn filename(path: &BStr) -> &BStr {
538    path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
539}
540
541fn empty_path() -> BString {
542    BString::default()
543}
544
545fn push_path_component(base: &mut BString, component: &[u8]) -> usize {
546    let prev_len = base.len();
547    debug_assert!(base.last() != Some(&b'/'));
548    if !base.is_empty() {
549        base.push_byte(b'/');
550    }
551    base.push_str(component);
552    prev_len
553}
554
555fn path_with_component(base: &BStr, component: &BStr) -> BString {
556    if base.is_empty() {
557        component.into()
558    } else {
559        let mut out = base.to_owned();
560        out.push_byte(b'/');
561        out.push_str(component);
562        out
563    }
564}