gix_object/tree/
editor.rs

1use std::{
2    cmp::Ordering,
3    collections::{hash_map, HashMap},
4    fmt::Formatter,
5};
6
7use bstr::{BStr, BString, ByteSlice, ByteVec};
8use gix_hash::ObjectId;
9
10use crate::{
11    tree,
12    tree::{Editor, EntryKind},
13    Tree,
14};
15
16/// A way to constrain all [tree-edits](Editor) to a given subtree.
17pub struct Cursor<'a, 'find> {
18    /// The underlying editor
19    parent: &'a mut Editor<'find>,
20    /// Our own location, used as prefix for all operations.
21    /// Note that it's assumed to always contain a tree.
22    prefix: BString,
23}
24
25impl std::fmt::Debug for Editor<'_> {
26    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
27        f.debug_struct("Editor")
28            .field("object_hash", &self.object_hash)
29            .field("path_buf", &self.path_buf)
30            .field("trees", &self.trees)
31            .finish()
32    }
33}
34
35/// The error returned by [Editor] or [Cursor] edit operation.
36#[derive(Debug, thiserror::Error)]
37#[allow(missing_docs)]
38pub enum Error {
39    #[error("Empty path components are not allowed")]
40    EmptyPathComponent,
41    #[error(transparent)]
42    FindExistingObject(#[from] crate::find::existing_object::Error),
43}
44
45/// Lifecycle
46impl<'a> Editor<'a> {
47    /// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing
48    /// trees when edits are made. Each tree will only be looked-up once and then edited in place from
49    /// that point on.
50    /// `object_hash` denotes the kind of hash to create.
51    pub fn new(root: Tree, find: &'a dyn crate::FindExt, object_hash: gix_hash::Kind) -> Self {
52        Editor {
53            find,
54            object_hash,
55            trees: HashMap::from_iter(Some((empty_path(), root))),
56            path_buf: BString::from(Vec::with_capacity(256)).into(),
57            tree_buf: Vec::with_capacity(512),
58        }
59    }
60}
61
62/// Operations
63impl Editor<'_> {
64    /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`, and remove
65    /// written portions from our state except for the root tree, which affects [`get()`](Editor::get()).
66    /// Note that the returned object id *can* be the empty tree if everything was removed or if nothing
67    /// was added to the tree.
68    ///
69    /// The last call to `out` will be the changed root tree, whose object-id will also be returned.
70    /// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist.
71    /// We don't assure that as there is no validation that inserted entries are valid object ids.
72    ///
73    /// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the
74    /// just-written root-tree.
75    /// If this is not desired, use [set_root()](Self::set_root()).
76    ///
77    /// ### Validation
78    ///
79    /// Note that no additional validation is performed to assure correctness of entry-names.
80    /// It is absolutely and intentionally possible to write out invalid trees with this method.
81    /// Higher layers are expected to perform detailed validation.
82    pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
83        self.path_buf.borrow_mut().clear();
84        self.write_at_pathbuf(out, WriteMode::Normal)
85    }
86
87    /// Remove the entry at `rela_path`, loading all trees on the path accordingly.
88    /// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
89    ///
90    /// Note that trying to remove a path with an empty component is also forbidden.
91    pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, Error>
92    where
93        I: IntoIterator<Item = C>,
94        C: AsRef<BStr>,
95    {
96        self.path_buf.borrow_mut().clear();
97        self.upsert_or_remove_at_pathbuf(rela_path, None)
98    }
99
100    /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written
101    /// to that point.
102    /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed.
103    /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly
104    /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory.
105    pub fn get<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
106    where
107        I: IntoIterator<Item = C>,
108        C: AsRef<BStr>,
109    {
110        self.path_buf.borrow_mut().clear();
111        self.get_inner(rela_path)
112    }
113
114    /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree,
115    /// like `a/b/c`. Names are matched case-sensitively.
116    ///
117    /// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database
118    /// or will be made available at a later point to assure the integrity of the produced tree.
119    ///
120    /// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries
121    /// will be inserted into them instead.
122    ///
123    /// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading
124    /// through them will not be considered a problem.
125    ///
126    /// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
127    /// in Git trees.
128    pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, Error>
129    where
130        I: IntoIterator<Item = C>,
131        C: AsRef<BStr>,
132    {
133        self.path_buf.borrow_mut().clear();
134        self.upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))
135    }
136
137    fn get_inner<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
138    where
139        I: IntoIterator<Item = C>,
140        C: AsRef<BStr>,
141    {
142        let mut path_buf = self.path_buf.borrow_mut();
143        let mut cursor = self.trees.get(path_buf.as_bstr()).expect("root is always present");
144        let mut rela_path = rela_path.into_iter().peekable();
145        while let Some(name) = rela_path.next() {
146            let name = name.as_ref();
147            let is_last = rela_path.peek().is_none();
148            match cursor
149                .entries
150                .binary_search_by(|e| cmp_entry_with_name(e, name, true))
151                .or_else(|_| cursor.entries.binary_search_by(|e| cmp_entry_with_name(e, name, false)))
152            {
153                Ok(idx) if is_last => return Some(&cursor.entries[idx]),
154                Ok(idx) => {
155                    if cursor.entries[idx].mode.is_tree() {
156                        push_path_component(&mut path_buf, name);
157                        cursor = self.trees.get(path_buf.as_bstr())?;
158                    } else {
159                        break;
160                    }
161                }
162                Err(_) => break,
163            }
164        }
165        None
166    }
167
168    fn write_at_pathbuf<E>(
169        &mut self,
170        mut out: impl FnMut(&Tree) -> Result<ObjectId, E>,
171        mode: WriteMode,
172    ) -> Result<ObjectId, E> {
173        assert_ne!(self.trees.len(), 0, "there is at least the root tree");
174
175        // back is for children, front is for parents.
176        let path_buf = self.path_buf.borrow_mut();
177        let mut parents = vec![(
178            None::<usize>,
179            path_buf.clone(),
180            self.trees
181                .remove(path_buf.as_bstr())
182                .expect("root tree is always present"),
183        )];
184        let mut children = Vec::new();
185        while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) {
186            let mut all_entries_unchanged_or_written = true;
187            for entry in &tree.entries {
188                if entry.mode.is_tree() {
189                    let prev_len = push_path_component(&mut rela_path, &entry.filename);
190                    if let Some(sub_tree) = self.trees.remove(&rela_path) {
191                        all_entries_unchanged_or_written = false;
192                        let next_parent_idx = parents.len();
193                        children.push((Some(next_parent_idx), rela_path.clone(), sub_tree));
194                    }
195                    rela_path.truncate(prev_len);
196                }
197            }
198            if all_entries_unchanged_or_written {
199                tree.entries.retain(|e| !e.oid.is_null());
200                if let Some((_, _, parent_to_adjust)) =
201                    parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero"))
202                {
203                    let name = filename(rela_path.as_bstr());
204                    let entry_idx = parent_to_adjust
205                        .entries
206                        .binary_search_by(|e| cmp_entry_with_name(e, name, true))
207                        .expect("the parent always knows us by name");
208                    if tree.entries.is_empty() {
209                        parent_to_adjust.entries.remove(entry_idx);
210                    } else {
211                        match out(&tree) {
212                            Ok(id) => {
213                                parent_to_adjust.entries[entry_idx].oid = id;
214                            }
215                            Err(err) => {
216                                let root_tree = parents.into_iter().next().expect("root wasn't consumed yet");
217                                self.trees.insert(root_tree.1, root_tree.2);
218                                return Err(err);
219                            }
220                        }
221                    }
222                } else if parents.is_empty() {
223                    debug_assert!(children.is_empty(), "we consume children before parents");
224                    debug_assert_eq!(rela_path, **path_buf, "this should always be the root tree");
225
226                    // There may be left-over trees if they are replaced with blobs for example.
227                    match out(&tree) {
228                        Ok(id) => {
229                            let root_tree_id = id;
230                            match mode {
231                                WriteMode::Normal => {
232                                    self.trees.clear();
233                                }
234                                WriteMode::FromCursor => {}
235                            }
236                            self.trees.insert(rela_path, tree);
237                            return Ok(root_tree_id);
238                        }
239                        Err(err) => {
240                            self.trees.insert(rela_path, tree);
241                            return Err(err);
242                        }
243                    }
244                } else if !tree.entries.is_empty() {
245                    out(&tree)?;
246                }
247            } else {
248                parents.push((parent_idx, rela_path, tree));
249            }
250        }
251
252        unreachable!("we exit as soon as everything is consumed")
253    }
254
255    fn upsert_or_remove_at_pathbuf<I, C>(
256        &mut self,
257        rela_path: I,
258        kind_and_id: Option<(EntryKind, ObjectId, UpsertMode)>,
259    ) -> Result<&mut Self, Error>
260    where
261        I: IntoIterator<Item = C>,
262        C: AsRef<BStr>,
263    {
264        let mut path_buf = self.path_buf.borrow_mut();
265        let mut cursor = self.trees.get_mut(path_buf.as_bstr()).expect("root is always present");
266        let mut rela_path = rela_path.into_iter().peekable();
267        let new_kind_is_tree = kind_and_id.is_some_and(|(kind, _, _)| kind == EntryKind::Tree);
268        while let Some(name) = rela_path.next() {
269            let name = name.as_ref();
270            if name.is_empty() {
271                return Err(Error::EmptyPathComponent);
272            }
273            let is_last = rela_path.peek().is_none();
274            let mut needs_sorting = false;
275            let current_level_must_be_tree = !is_last || new_kind_is_tree;
276            let check_type_change = |entry: &tree::Entry| entry.mode.is_tree() != current_level_must_be_tree;
277            let tree_to_lookup = match cursor
278                .entries
279                .binary_search_by(|e| cmp_entry_with_name(e, name, false))
280                .or_else(|file_insertion_idx| {
281                    cursor
282                        .entries
283                        .binary_search_by(|e| cmp_entry_with_name(e, name, true))
284                        .map_err(|dir_insertion_index| {
285                            if current_level_must_be_tree {
286                                dir_insertion_index
287                            } else {
288                                file_insertion_idx
289                            }
290                        })
291                }) {
292                Ok(idx) => {
293                    match kind_and_id {
294                        None => {
295                            if is_last {
296                                cursor.entries.remove(idx);
297                                break;
298                            } else {
299                                let entry = &cursor.entries[idx];
300                                if entry.mode.is_tree() {
301                                    Some(entry.oid)
302                                } else {
303                                    break;
304                                }
305                            }
306                        }
307                        Some((kind, id, _mode)) => {
308                            let entry = &mut cursor.entries[idx];
309                            if is_last {
310                                // unconditionally overwrite what's there.
311                                entry.oid = id;
312                                needs_sorting = check_type_change(entry);
313                                entry.mode = kind.into();
314                                None
315                            } else if entry.mode.is_tree() {
316                                // Possibly lookup the existing tree on our way down the path.
317                                Some(entry.oid)
318                            } else {
319                                // it is no tree, but we are traversing a path, so turn it into one.
320                                entry.oid = id.kind().null();
321                                needs_sorting = check_type_change(entry);
322                                entry.mode = EntryKind::Tree.into();
323                                None
324                            }
325                        }
326                    }
327                }
328                Err(insertion_idx) => match kind_and_id {
329                    None => break,
330                    Some((kind, id, _mode)) => {
331                        cursor.entries.insert(
332                            insertion_idx,
333                            tree::Entry {
334                                filename: name.into(),
335                                mode: if is_last { kind.into() } else { EntryKind::Tree.into() },
336                                oid: if is_last { id } else { id.kind().null() },
337                            },
338                        );
339                        None
340                    }
341                },
342            };
343            if needs_sorting {
344                cursor.entries.sort();
345            }
346            if is_last && kind_and_id.is_some_and(|(_, _, mode)| mode == UpsertMode::Normal) {
347                break;
348            }
349            push_path_component(&mut path_buf, name);
350            cursor = match self.trees.entry(path_buf.clone()) {
351                hash_map::Entry::Occupied(e) => e.into_mut(),
352                hash_map::Entry::Vacant(e) => e.insert(
353                    if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) {
354                        self.find.find_tree(&tree_id, &mut self.tree_buf)?.into()
355                    } else {
356                        Tree::default()
357                    },
358                ),
359            };
360        }
361        drop(path_buf);
362        Ok(self)
363    }
364
365    /// Set the root tree of the modification to `root`, assuring it has a well-known state.
366    ///
367    /// Note that this erases all previous edits.
368    ///
369    /// This is useful if the same editor is re-used for various trees.
370    pub fn set_root(&mut self, root: Tree) -> &mut Self {
371        self.trees.clear();
372        self.trees.insert(empty_path(), root);
373        self
374    }
375}
376
377mod cursor {
378    use bstr::{BStr, BString};
379    use gix_hash::ObjectId;
380
381    use crate::{
382        tree,
383        tree::{
384            editor::{Cursor, UpsertMode, WriteMode},
385            Editor, EntryKind,
386        },
387        Tree,
388    };
389
390    /// Cursor handling
391    impl<'a> Editor<'a> {
392        /// Turn ourselves as a cursor, which points to the same tree as the editor.
393        ///
394        /// This is useful if a method takes a [`Cursor`], not an [`Editor`].
395        pub fn to_cursor(&mut self) -> Cursor<'_, 'a> {
396            Cursor {
397                parent: self,
398                prefix: BString::default(),
399            }
400        }
401
402        /// Create a cursor at the given `rela_path`, which must be a tree or is turned into a tree as its own edit.
403        ///
404        /// The returned cursor will then allow applying edits to the tree at `rela_path` as root.
405        /// If `rela_path` is a single empty string, it is equivalent to using the current instance itself.
406        pub fn cursor_at<I, C>(&mut self, rela_path: I) -> Result<Cursor<'_, 'a>, super::Error>
407        where
408            I: IntoIterator<Item = C>,
409            C: AsRef<BStr>,
410        {
411            self.path_buf.borrow_mut().clear();
412            self.upsert_or_remove_at_pathbuf(
413                rela_path,
414                Some((EntryKind::Tree, self.object_hash.null(), UpsertMode::AssureTreeOnly)),
415            )?;
416            let prefix = self.path_buf.borrow_mut().clone();
417            Ok(Cursor {
418                prefix, /* set during the upsert call */
419                parent: self,
420            })
421        }
422    }
423
424    impl Cursor<'_, '_> {
425        /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written
426        /// to that point.
427        /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed.
428        /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly
429        /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory.
430        pub fn get<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
431        where
432            I: IntoIterator<Item = C>,
433            C: AsRef<BStr>,
434        {
435            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
436            self.parent.get_inner(rela_path)
437        }
438
439        /// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree.
440        pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, super::Error>
441        where
442            I: IntoIterator<Item = C>,
443            C: AsRef<BStr>,
444        {
445            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
446            self.parent
447                .upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))?;
448            Ok(self)
449        }
450
451        /// Like [`Editor::remove()`], but with the constraint of only editing in this cursor's tree.
452        pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, super::Error>
453        where
454            I: IntoIterator<Item = C>,
455            C: AsRef<BStr>,
456        {
457            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
458            self.parent.upsert_or_remove_at_pathbuf(rela_path, None)?;
459            Ok(self)
460        }
461
462        /// Like [`Editor::write()`], but will write only the subtree of the cursor.
463        pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
464            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
465            self.parent.write_at_pathbuf(out, WriteMode::FromCursor)
466        }
467    }
468}
469
470#[derive(Copy, Clone, Eq, PartialEq)]
471enum UpsertMode {
472    Normal,
473    /// Only make sure there is a tree at the given location (requires kind tree and null-id)
474    AssureTreeOnly,
475}
476
477enum WriteMode {
478    Normal,
479    /// Perform less cleanup to assure parent-editor still stays intact
480    FromCursor,
481}
482
483fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering {
484    let common = a.filename.len().min(filename.len());
485    a.filename[..common].cmp(&filename[..common]).then_with(|| {
486        let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
487        let b = filename.get(common).or_else(|| is_tree.then_some(&b'/'));
488        a.cmp(&b)
489    })
490}
491
492fn filename(path: &BStr) -> &BStr {
493    path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
494}
495
496fn empty_path() -> BString {
497    BString::default()
498}
499
500fn push_path_component(base: &mut BString, component: &[u8]) -> usize {
501    let prev_len = base.len();
502    debug_assert!(base.last() != Some(&b'/'));
503    if !base.is_empty() {
504        base.push_byte(b'/');
505    }
506    base.push_str(component);
507    prev_len
508}