gix_object/tree/
editor.rs

1use crate::tree::{Editor, EntryKind};
2use crate::{tree, Tree};
3use bstr::{BStr, BString, ByteSlice, ByteVec};
4use gix_hash::ObjectId;
5use std::cmp::Ordering;
6use std::collections::{hash_map, HashMap};
7use std::fmt::Formatter;
8
9/// A way to constrain all [tree-edits](Editor) to a given subtree.
10pub struct Cursor<'a, 'find> {
11    /// The underlying editor
12    parent: &'a mut Editor<'find>,
13    /// Our own location, used as prefix for all operations.
14    /// Note that it's assumed to always contain a tree.
15    prefix: BString,
16}
17
18impl std::fmt::Debug for Editor<'_> {
19    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
20        f.debug_struct("Editor")
21            .field("object_hash", &self.object_hash)
22            .field("path_buf", &self.path_buf)
23            .field("trees", &self.trees)
24            .finish()
25    }
26}
27
28/// The error returned by [Editor] or [Cursor] edit operation.
29#[derive(Debug, thiserror::Error)]
30#[allow(missing_docs)]
31pub enum Error {
32    #[error("Empty path components are not allowed")]
33    EmptyPathComponent,
34    #[error(transparent)]
35    FindExistingObject(#[from] crate::find::existing_object::Error),
36}
37
38/// Lifecycle
39impl<'a> Editor<'a> {
40    /// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing
41    /// trees when edits are made. Each tree will only be looked-up once and then edited in place from
42    /// that point on.
43    /// `object_hash` denotes the kind of hash to create.
44    pub fn new(root: Tree, find: &'a dyn crate::FindExt, object_hash: gix_hash::Kind) -> Self {
45        Editor {
46            find,
47            object_hash,
48            trees: HashMap::from_iter(Some((empty_path(), root))),
49            path_buf: BString::from(Vec::with_capacity(256)).into(),
50            tree_buf: Vec::with_capacity(512),
51        }
52    }
53}
54
55/// Operations
56impl Editor<'_> {
57    /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`, and remove
58    /// written portions from our state except for the root tree, which affects [`get()`](Editor::get()).
59    /// Note that the returned object id *can* be the empty tree if everything was removed or if nothing
60    /// was added to the tree.
61    ///
62    /// The last call to `out` will be the changed root tree, whose object-id will also be returned.
63    /// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist.
64    /// We don't assure that as there is no validation that inserted entries are valid object ids.
65    ///
66    /// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the
67    /// just-written root-tree.
68    /// If this is not desired, use [set_root()](Self::set_root()).
69    ///
70    /// ### Validation
71    ///
72    /// Note that no additional validation is performed to assure correctness of entry-names.
73    /// It is absolutely and intentionally possible to write out invalid trees with this method.
74    /// Higher layers are expected to perform detailed validation.
75    pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
76        self.path_buf.borrow_mut().clear();
77        self.write_at_pathbuf(out, WriteMode::Normal)
78    }
79
80    /// Remove the entry at `rela_path`, loading all trees on the path accordingly.
81    /// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
82    ///
83    /// Note that trying to remove a path with an empty component is also forbidden.
84    pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, Error>
85    where
86        I: IntoIterator<Item = C>,
87        C: AsRef<BStr>,
88    {
89        self.path_buf.borrow_mut().clear();
90        self.upsert_or_remove_at_pathbuf(rela_path, None)
91    }
92
93    /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written
94    /// to that point.
95    /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed.
96    /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly
97    /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory.
98    pub fn get<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
99    where
100        I: IntoIterator<Item = C>,
101        C: AsRef<BStr>,
102    {
103        self.path_buf.borrow_mut().clear();
104        self.get_inner(rela_path)
105    }
106
107    /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree,
108    /// like `a/b/c`. Names are matched case-sensitively.
109    ///
110    /// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database
111    /// or will be made available at a later point to assure the integrity of the produced tree.
112    ///
113    /// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries
114    /// will be inserted into them instead.
115    ///
116    /// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading
117    /// through them will not be considered a problem.
118    ///
119    /// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
120    /// in Git trees.
121    pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, Error>
122    where
123        I: IntoIterator<Item = C>,
124        C: AsRef<BStr>,
125    {
126        self.path_buf.borrow_mut().clear();
127        self.upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))
128    }
129
130    fn get_inner<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
131    where
132        I: IntoIterator<Item = C>,
133        C: AsRef<BStr>,
134    {
135        let mut path_buf = self.path_buf.borrow_mut();
136        let mut cursor = self.trees.get(path_buf.as_bstr()).expect("root is always present");
137        let mut rela_path = rela_path.into_iter().peekable();
138        while let Some(name) = rela_path.next() {
139            let name = name.as_ref();
140            let is_last = rela_path.peek().is_none();
141            match cursor
142                .entries
143                .binary_search_by(|e| cmp_entry_with_name(e, name, true))
144                .or_else(|_| cursor.entries.binary_search_by(|e| cmp_entry_with_name(e, name, false)))
145            {
146                Ok(idx) if is_last => return Some(&cursor.entries[idx]),
147                Ok(idx) => {
148                    if cursor.entries[idx].mode.is_tree() {
149                        push_path_component(&mut path_buf, name);
150                        cursor = self.trees.get(path_buf.as_bstr())?;
151                    } else {
152                        break;
153                    }
154                }
155                Err(_) => break,
156            };
157        }
158        None
159    }
160
161    fn write_at_pathbuf<E>(
162        &mut self,
163        mut out: impl FnMut(&Tree) -> Result<ObjectId, E>,
164        mode: WriteMode,
165    ) -> Result<ObjectId, E> {
166        assert_ne!(self.trees.len(), 0, "there is at least the root tree");
167
168        // back is for children, front is for parents.
169        let path_buf = self.path_buf.borrow_mut();
170        let mut parents = vec![(
171            None::<usize>,
172            path_buf.clone(),
173            self.trees
174                .remove(path_buf.as_bstr())
175                .expect("root tree is always present"),
176        )];
177        let mut children = Vec::new();
178        while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) {
179            let mut all_entries_unchanged_or_written = true;
180            for entry in &tree.entries {
181                if entry.mode.is_tree() {
182                    let prev_len = push_path_component(&mut rela_path, &entry.filename);
183                    if let Some(sub_tree) = self.trees.remove(&rela_path) {
184                        all_entries_unchanged_or_written = false;
185                        let next_parent_idx = parents.len();
186                        children.push((Some(next_parent_idx), rela_path.clone(), sub_tree));
187                    }
188                    rela_path.truncate(prev_len);
189                }
190            }
191            if all_entries_unchanged_or_written {
192                tree.entries.retain(|e| !e.oid.is_null());
193                if let Some((_, _, parent_to_adjust)) =
194                    parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero"))
195                {
196                    let name = filename(rela_path.as_bstr());
197                    let entry_idx = parent_to_adjust
198                        .entries
199                        .binary_search_by(|e| cmp_entry_with_name(e, name, true))
200                        .expect("the parent always knows us by name");
201                    if tree.entries.is_empty() {
202                        parent_to_adjust.entries.remove(entry_idx);
203                    } else {
204                        match out(&tree) {
205                            Ok(id) => {
206                                parent_to_adjust.entries[entry_idx].oid = id;
207                            }
208                            Err(err) => {
209                                let root_tree = parents.into_iter().next().expect("root wasn't consumed yet");
210                                self.trees.insert(root_tree.1, root_tree.2);
211                                return Err(err);
212                            }
213                        }
214                    }
215                } else if parents.is_empty() {
216                    debug_assert!(children.is_empty(), "we consume children before parents");
217                    debug_assert_eq!(rela_path, **path_buf, "this should always be the root tree");
218
219                    // There may be left-over trees if they are replaced with blobs for example.
220                    match out(&tree) {
221                        Ok(id) => {
222                            let root_tree_id = id;
223                            match mode {
224                                WriteMode::Normal => {
225                                    self.trees.clear();
226                                }
227                                WriteMode::FromCursor => {}
228                            }
229                            self.trees.insert(rela_path, tree);
230                            return Ok(root_tree_id);
231                        }
232                        Err(err) => {
233                            self.trees.insert(rela_path, tree);
234                            return Err(err);
235                        }
236                    }
237                } else if !tree.entries.is_empty() {
238                    out(&tree)?;
239                }
240            } else {
241                parents.push((parent_idx, rela_path, tree));
242            }
243        }
244
245        unreachable!("we exit as soon as everything is consumed")
246    }
247
248    fn upsert_or_remove_at_pathbuf<I, C>(
249        &mut self,
250        rela_path: I,
251        kind_and_id: Option<(EntryKind, ObjectId, UpsertMode)>,
252    ) -> Result<&mut Self, Error>
253    where
254        I: IntoIterator<Item = C>,
255        C: AsRef<BStr>,
256    {
257        let mut path_buf = self.path_buf.borrow_mut();
258        let mut cursor = self.trees.get_mut(path_buf.as_bstr()).expect("root is always present");
259        let mut rela_path = rela_path.into_iter().peekable();
260        let new_kind_is_tree = kind_and_id.is_some_and(|(kind, _, _)| kind == EntryKind::Tree);
261        while let Some(name) = rela_path.next() {
262            let name = name.as_ref();
263            if name.is_empty() {
264                return Err(Error::EmptyPathComponent);
265            }
266            let is_last = rela_path.peek().is_none();
267            let mut needs_sorting = false;
268            let current_level_must_be_tree = !is_last || new_kind_is_tree;
269            let check_type_change = |entry: &tree::Entry| entry.mode.is_tree() != current_level_must_be_tree;
270            let tree_to_lookup = match cursor
271                .entries
272                .binary_search_by(|e| cmp_entry_with_name(e, name, false))
273                .or_else(|file_insertion_idx| {
274                    cursor
275                        .entries
276                        .binary_search_by(|e| cmp_entry_with_name(e, name, true))
277                        .map_err(|dir_insertion_index| {
278                            if current_level_must_be_tree {
279                                dir_insertion_index
280                            } else {
281                                file_insertion_idx
282                            }
283                        })
284                }) {
285                Ok(idx) => {
286                    match kind_and_id {
287                        None => {
288                            if is_last {
289                                cursor.entries.remove(idx);
290                                break;
291                            } else {
292                                let entry = &cursor.entries[idx];
293                                if entry.mode.is_tree() {
294                                    Some(entry.oid)
295                                } else {
296                                    break;
297                                }
298                            }
299                        }
300                        Some((kind, id, _mode)) => {
301                            let entry = &mut cursor.entries[idx];
302                            if is_last {
303                                // unconditionally overwrite what's there.
304                                entry.oid = id;
305                                needs_sorting = check_type_change(entry);
306                                entry.mode = kind.into();
307                                None
308                            } else if entry.mode.is_tree() {
309                                // Possibly lookup the existing tree on our way down the path.
310                                Some(entry.oid)
311                            } else {
312                                // it is no tree, but we are traversing a path, so turn it into one.
313                                entry.oid = id.kind().null();
314                                needs_sorting = check_type_change(entry);
315                                entry.mode = EntryKind::Tree.into();
316                                None
317                            }
318                        }
319                    }
320                }
321                Err(insertion_idx) => match kind_and_id {
322                    None => break,
323                    Some((kind, id, _mode)) => {
324                        cursor.entries.insert(
325                            insertion_idx,
326                            tree::Entry {
327                                filename: name.into(),
328                                mode: if is_last { kind.into() } else { EntryKind::Tree.into() },
329                                oid: if is_last { id } else { id.kind().null() },
330                            },
331                        );
332                        None
333                    }
334                },
335            };
336            if needs_sorting {
337                cursor.entries.sort();
338            }
339            if is_last && kind_and_id.is_some_and(|(_, _, mode)| mode == UpsertMode::Normal) {
340                break;
341            }
342            push_path_component(&mut path_buf, name);
343            cursor = match self.trees.entry(path_buf.clone()) {
344                hash_map::Entry::Occupied(e) => e.into_mut(),
345                hash_map::Entry::Vacant(e) => e.insert(
346                    if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) {
347                        self.find.find_tree(&tree_id, &mut self.tree_buf)?.into()
348                    } else {
349                        Tree::default()
350                    },
351                ),
352            };
353        }
354        drop(path_buf);
355        Ok(self)
356    }
357
358    /// Set the root tree of the modification to `root`, assuring it has a well-known state.
359    ///
360    /// Note that this erases all previous edits.
361    ///
362    /// This is useful if the same editor is re-used for various trees.
363    pub fn set_root(&mut self, root: Tree) -> &mut Self {
364        self.trees.clear();
365        self.trees.insert(empty_path(), root);
366        self
367    }
368}
369
370mod cursor {
371    use crate::tree::editor::{Cursor, UpsertMode, WriteMode};
372    use crate::tree::{Editor, EntryKind};
373    use crate::{tree, Tree};
374    use bstr::{BStr, BString};
375    use gix_hash::ObjectId;
376
377    /// Cursor handling
378    impl<'a> Editor<'a> {
379        /// Turn ourselves as a cursor, which points to the same tree as the editor.
380        ///
381        /// This is useful if a method takes a [`Cursor`], not an [`Editor`].
382        pub fn to_cursor(&mut self) -> Cursor<'_, 'a> {
383            Cursor {
384                parent: self,
385                prefix: BString::default(),
386            }
387        }
388
389        /// Create a cursor at the given `rela_path`, which must be a tree or is turned into a tree as its own edit.
390        ///
391        /// The returned cursor will then allow applying edits to the tree at `rela_path` as root.
392        /// If `rela_path` is a single empty string, it is equivalent to using the current instance itself.
393        pub fn cursor_at<I, C>(&mut self, rela_path: I) -> Result<Cursor<'_, 'a>, super::Error>
394        where
395            I: IntoIterator<Item = C>,
396            C: AsRef<BStr>,
397        {
398            self.path_buf.borrow_mut().clear();
399            self.upsert_or_remove_at_pathbuf(
400                rela_path,
401                Some((EntryKind::Tree, self.object_hash.null(), UpsertMode::AssureTreeOnly)),
402            )?;
403            let prefix = self.path_buf.borrow_mut().clone();
404            Ok(Cursor {
405                prefix, /* set during the upsert call */
406                parent: self,
407            })
408        }
409    }
410
411    impl Cursor<'_, '_> {
412        /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written
413        /// to that point.
414        /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed.
415        /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly
416        /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory.
417        pub fn get<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
418        where
419            I: IntoIterator<Item = C>,
420            C: AsRef<BStr>,
421        {
422            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
423            self.parent.get_inner(rela_path)
424        }
425
426        /// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree.
427        pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, super::Error>
428        where
429            I: IntoIterator<Item = C>,
430            C: AsRef<BStr>,
431        {
432            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
433            self.parent
434                .upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))?;
435            Ok(self)
436        }
437
438        /// Like [`Editor::remove()`], but with the constraint of only editing in this cursor's tree.
439        pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, super::Error>
440        where
441            I: IntoIterator<Item = C>,
442            C: AsRef<BStr>,
443        {
444            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
445            self.parent.upsert_or_remove_at_pathbuf(rela_path, None)?;
446            Ok(self)
447        }
448
449        /// Like [`Editor::write()`], but will write only the subtree of the cursor.
450        pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
451            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
452            self.parent.write_at_pathbuf(out, WriteMode::FromCursor)
453        }
454    }
455}
456
457#[derive(Copy, Clone, Eq, PartialEq)]
458enum UpsertMode {
459    Normal,
460    /// Only make sure there is a tree at the given location (requires kind tree and null-id)
461    AssureTreeOnly,
462}
463
464enum WriteMode {
465    Normal,
466    /// Perform less cleanup to assure parent-editor still stays intact
467    FromCursor,
468}
469
470fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering {
471    let common = a.filename.len().min(filename.len());
472    a.filename[..common].cmp(&filename[..common]).then_with(|| {
473        let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
474        let b = filename.get(common).or_else(|| is_tree.then_some(&b'/'));
475        a.cmp(&b)
476    })
477}
478
479fn filename(path: &BStr) -> &BStr {
480    path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
481}
482
483fn empty_path() -> BString {
484    BString::default()
485}
486
487fn push_path_component(base: &mut BString, component: &[u8]) -> usize {
488    let prev_len = base.len();
489    debug_assert!(base.last() != Some(&b'/'));
490    if !base.is_empty() {
491        base.push_byte(b'/');
492    }
493    base.push_str(component);
494    prev_len
495}