Skip to main content

gix_object/tree/
editor.rs

1use std::{
2    cmp::Ordering,
3    collections::{HashMap, hash_map},
4    fmt::Formatter,
5};
6
7use bstr::{BStr, BString, ByteSlice, ByteVec};
8use gix_hash::ObjectId;
9
10use crate::{
11    Tree, tree,
12    tree::{Editor, EntryKind},
13};
14
15/// A way to constrain all [tree-edits](Editor) to a given subtree.
16pub struct Cursor<'a, 'find> {
17    /// The underlying editor
18    parent: &'a mut Editor<'find>,
19    /// Our own location, used as prefix for all operations.
20    /// Note that it's assumed to always contain a tree.
21    prefix: BString,
22}
23
24impl std::fmt::Debug for Editor<'_> {
25    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26        f.debug_struct("Editor")
27            .field("object_hash", &self.object_hash)
28            .field("path_buf", &self.path_buf)
29            .field("trees", &self.trees)
30            .finish()
31    }
32}
33
34/// The error returned by [Editor] or [Cursor] edit operation.
35#[derive(Debug, thiserror::Error)]
36#[allow(missing_docs)]
37pub enum Error {
38    #[error("Empty path components are not allowed")]
39    EmptyPathComponent,
40    #[error(transparent)]
41    FindExistingObject(#[from] crate::find::existing_object::Error),
42}
43
44/// Lifecycle
45impl<'a> Editor<'a> {
46    /// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing
47    /// trees when edits are made. Each tree will only be looked-up once and then edited in place from
48    /// that point on.
49    /// `object_hash` denotes the kind of hash to create.
50    pub fn new(root: Tree, find: &'a dyn crate::FindExt, object_hash: gix_hash::Kind) -> Self {
51        Editor {
52            find,
53            object_hash,
54            trees: HashMap::from_iter(Some((empty_path(), root))),
55            path_buf: BString::from(Vec::with_capacity(256)).into(),
56            tree_buf: Vec::with_capacity(512),
57        }
58    }
59}
60
61/// Operations
62impl Editor<'_> {
63    /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`, and remove
64    /// written portions from our state except for the root tree, which affects [`get()`](Editor::get()).
65    /// Note that the returned object id *can* be the empty tree if everything was removed or if nothing
66    /// was added to the tree.
67    ///
68    /// The last call to `out` will be the changed root tree, whose object-id will also be returned.
69    /// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist.
70    /// We don't assure that as there is no validation that inserted entries are valid object ids.
71    ///
72    /// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the
73    /// just-written root-tree.
74    /// If this is not desired, use [set_root()](Self::set_root()).
75    ///
76    /// ### Validation
77    ///
78    /// Note that no additional validation is performed to assure correctness of entry-names.
79    /// It is absolutely and intentionally possible to write out invalid trees with this method.
80    /// Higher layers are expected to perform detailed validation.
81    pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
82        self.path_buf.borrow_mut().clear();
83        self.write_at_pathbuf(out, WriteMode::Normal)
84    }
85
86    /// Remove the entry at `rela_path`, loading all trees on the path accordingly.
87    /// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
88    ///
89    /// Note that trying to remove a path with an empty component is also forbidden.
90    pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, Error>
91    where
92        I: IntoIterator<Item = C>,
93        C: AsRef<BStr>,
94    {
95        self.path_buf.borrow_mut().clear();
96        self.upsert_or_remove_at_pathbuf(rela_path, None)
97    }
98
99    /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written
100    /// to that point.
101    /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed.
102    /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly
103    /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory.
104    pub fn get<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
105    where
106        I: IntoIterator<Item = C>,
107        C: AsRef<BStr>,
108    {
109        self.path_buf.borrow_mut().clear();
110        self.get_inner(rela_path)
111    }
112
113    /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree,
114    /// like `a/b/c`. Names are matched case-sensitively.
115    ///
116    /// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database
117    /// or will be made available at a later point to assure the integrity of the produced tree.
118    ///
119    /// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries
120    /// will be inserted into them instead.
121    ///
122    /// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading
123    /// through them will not be considered a problem.
124    ///
125    /// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
126    /// in Git trees.
127    pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, Error>
128    where
129        I: IntoIterator<Item = C>,
130        C: AsRef<BStr>,
131    {
132        self.path_buf.borrow_mut().clear();
133        self.upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))
134    }
135
136    fn get_inner<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
137    where
138        I: IntoIterator<Item = C>,
139        C: AsRef<BStr>,
140    {
141        let mut path_buf = self.path_buf.borrow_mut();
142        let mut cursor = self.trees.get(path_buf.as_bstr()).expect("root is always present");
143        let mut rela_path = rela_path.into_iter().peekable();
144        while let Some(name) = rela_path.next() {
145            let name = name.as_ref();
146            let is_last = rela_path.peek().is_none();
147            match cursor
148                .entries
149                .binary_search_by(|e| cmp_entry_with_name(e, name, true))
150                .or_else(|_| cursor.entries.binary_search_by(|e| cmp_entry_with_name(e, name, false)))
151            {
152                Ok(idx) if is_last => return Some(&cursor.entries[idx]),
153                Ok(idx) => {
154                    if cursor.entries[idx].mode.is_tree() {
155                        push_path_component(&mut path_buf, name);
156                        cursor = self.trees.get(path_buf.as_bstr())?;
157                    } else {
158                        break;
159                    }
160                }
161                Err(_) => break,
162            }
163        }
164        None
165    }
166
167    fn write_at_pathbuf<E>(
168        &mut self,
169        mut out: impl FnMut(&Tree) -> Result<ObjectId, E>,
170        mode: WriteMode,
171    ) -> Result<ObjectId, E> {
172        assert_ne!(self.trees.len(), 0, "there is at least the root tree");
173
174        // back is for children, front is for parents.
175        let path_buf = self.path_buf.borrow_mut();
176        let mut parents = vec![(
177            None::<usize>,
178            path_buf.clone(),
179            self.trees
180                .remove(path_buf.as_bstr())
181                .expect("root tree is always present"),
182        )];
183        let mut children = Vec::new();
184        while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) {
185            let mut all_entries_unchanged_or_written = true;
186            for entry in &tree.entries {
187                if entry.mode.is_tree() {
188                    let prev_len = push_path_component(&mut rela_path, &entry.filename);
189                    if let Some(sub_tree) = self.trees.remove(&rela_path) {
190                        all_entries_unchanged_or_written = false;
191                        let next_parent_idx = parents.len();
192                        children.push((Some(next_parent_idx), rela_path.clone(), sub_tree));
193                    }
194                    rela_path.truncate(prev_len);
195                }
196            }
197            if all_entries_unchanged_or_written {
198                tree.entries.retain(|e| !e.oid.is_null());
199                if let Some((_, _, parent_to_adjust)) =
200                    parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero"))
201                {
202                    let name = filename(rela_path.as_bstr());
203                    let entry_idx = parent_to_adjust
204                        .entries
205                        .binary_search_by(|e| cmp_entry_with_name(e, name, true))
206                        .expect("the parent always knows us by name");
207                    if tree.entries.is_empty() {
208                        parent_to_adjust.entries.remove(entry_idx);
209                    } else {
210                        match out(&tree) {
211                            Ok(id) => {
212                                parent_to_adjust.entries[entry_idx].oid = id;
213                            }
214                            Err(err) => {
215                                let root_tree = parents.into_iter().next().expect("root wasn't consumed yet");
216                                self.trees.insert(root_tree.1, root_tree.2);
217                                return Err(err);
218                            }
219                        }
220                    }
221                } else if parents.is_empty() {
222                    debug_assert!(children.is_empty(), "we consume children before parents");
223                    debug_assert_eq!(rela_path, **path_buf, "this should always be the root tree");
224
225                    // There may be left-over trees if they are replaced with blobs for example.
226                    match out(&tree) {
227                        Ok(id) => {
228                            let root_tree_id = id;
229                            match mode {
230                                WriteMode::Normal => {
231                                    self.trees.clear();
232                                }
233                                WriteMode::FromCursor => {}
234                            }
235                            self.trees.insert(rela_path, tree);
236                            return Ok(root_tree_id);
237                        }
238                        Err(err) => {
239                            self.trees.insert(rela_path, tree);
240                            return Err(err);
241                        }
242                    }
243                } else if !tree.entries.is_empty() {
244                    out(&tree)?;
245                }
246            } else {
247                parents.push((parent_idx, rela_path, tree));
248            }
249        }
250
251        unreachable!("we exit as soon as everything is consumed")
252    }
253
254    fn upsert_or_remove_at_pathbuf<I, C>(
255        &mut self,
256        rela_path: I,
257        kind_and_id: Option<(EntryKind, ObjectId, UpsertMode)>,
258    ) -> Result<&mut Self, Error>
259    where
260        I: IntoIterator<Item = C>,
261        C: AsRef<BStr>,
262    {
263        let mut path_buf = self.path_buf.borrow_mut();
264        let mut cursor = self.trees.get_mut(path_buf.as_bstr()).expect("root is always present");
265        let mut rela_path = rela_path.into_iter().peekable();
266        let new_kind_is_tree = kind_and_id.is_some_and(|(kind, _, _)| kind == EntryKind::Tree);
267        while let Some(name) = rela_path.next() {
268            let name = name.as_ref();
269            if name.is_empty() {
270                return Err(Error::EmptyPathComponent);
271            }
272            let is_last = rela_path.peek().is_none();
273            let mut needs_sorting = false;
274            let current_level_must_be_tree = !is_last || new_kind_is_tree;
275            let check_type_change = |entry: &tree::Entry| entry.mode.is_tree() != current_level_must_be_tree;
276            let tree_to_lookup = match cursor
277                .entries
278                .binary_search_by(|e| cmp_entry_with_name(e, name, false))
279                .or_else(|file_insertion_idx| {
280                    cursor
281                        .entries
282                        .binary_search_by(|e| cmp_entry_with_name(e, name, true))
283                        .map_err(|dir_insertion_index| {
284                            if current_level_must_be_tree {
285                                dir_insertion_index
286                            } else {
287                                file_insertion_idx
288                            }
289                        })
290                }) {
291                Ok(idx) => {
292                    match kind_and_id {
293                        None => {
294                            if is_last {
295                                cursor.entries.remove(idx);
296                                break;
297                            } else {
298                                let entry = &cursor.entries[idx];
299                                if entry.mode.is_tree() {
300                                    Some(entry.oid)
301                                } else {
302                                    break;
303                                }
304                            }
305                        }
306                        Some((kind, id, _mode)) => {
307                            let entry = &mut cursor.entries[idx];
308                            if is_last {
309                                // unconditionally overwrite what's there.
310                                entry.oid = id;
311                                needs_sorting = check_type_change(entry);
312                                entry.mode = kind.into();
313                                None
314                            } else if entry.mode.is_tree() {
315                                // Possibly lookup the existing tree on our way down the path.
316                                Some(entry.oid)
317                            } else {
318                                // it is no tree, but we are traversing a path, so turn it into one.
319                                entry.oid = id.kind().null();
320                                needs_sorting = check_type_change(entry);
321                                entry.mode = EntryKind::Tree.into();
322                                None
323                            }
324                        }
325                    }
326                }
327                Err(insertion_idx) => match kind_and_id {
328                    None => break,
329                    Some((kind, id, _mode)) => {
330                        cursor.entries.insert(
331                            insertion_idx,
332                            tree::Entry {
333                                filename: name.into(),
334                                mode: if is_last { kind.into() } else { EntryKind::Tree.into() },
335                                oid: if is_last { id } else { id.kind().null() },
336                            },
337                        );
338                        None
339                    }
340                },
341            };
342            if needs_sorting {
343                cursor.entries.sort();
344            }
345            if is_last && kind_and_id.is_some_and(|(_, _, mode)| mode == UpsertMode::Normal) {
346                break;
347            }
348            push_path_component(&mut path_buf, name);
349            cursor = match self.trees.entry(path_buf.clone()) {
350                hash_map::Entry::Occupied(e) => e.into_mut(),
351                hash_map::Entry::Vacant(e) => e.insert(
352                    if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) {
353                        self.find.find_tree(&tree_id, &mut self.tree_buf)?.into()
354                    } else {
355                        Tree::default()
356                    },
357                ),
358            };
359        }
360        drop(path_buf);
361        Ok(self)
362    }
363
364    /// Set the root tree of the modification to `root`, assuring it has a well-known state.
365    ///
366    /// Note that this erases all previous edits.
367    ///
368    /// This is useful if the same editor is re-used for various trees.
369    pub fn set_root(&mut self, root: Tree) -> &mut Self {
370        self.trees.clear();
371        self.trees.insert(empty_path(), root);
372        self
373    }
374}
375
376mod cursor {
377    use bstr::{BStr, BString};
378    use gix_hash::ObjectId;
379
380    use crate::{
381        Tree, tree,
382        tree::{
383            Editor, EntryKind,
384            editor::{Cursor, UpsertMode, WriteMode},
385        },
386    };
387
388    /// Cursor handling
389    impl<'a> Editor<'a> {
390        /// Turn ourselves as a cursor, which points to the same tree as the editor.
391        ///
392        /// This is useful if a method takes a [`Cursor`], not an [`Editor`].
393        pub fn to_cursor(&mut self) -> Cursor<'_, 'a> {
394            Cursor {
395                parent: self,
396                prefix: BString::default(),
397            }
398        }
399
400        /// Create a cursor at the given `rela_path`, which must be a tree or is turned into a tree as its own edit.
401        ///
402        /// The returned cursor will then allow applying edits to the tree at `rela_path` as root.
403        /// If `rela_path` is a single empty string, it is equivalent to using the current instance itself.
404        pub fn cursor_at<I, C>(&mut self, rela_path: I) -> Result<Cursor<'_, 'a>, super::Error>
405        where
406            I: IntoIterator<Item = C>,
407            C: AsRef<BStr>,
408        {
409            self.path_buf.borrow_mut().clear();
410            self.upsert_or_remove_at_pathbuf(
411                rela_path,
412                Some((EntryKind::Tree, self.object_hash.null(), UpsertMode::AssureTreeOnly)),
413            )?;
414            let prefix = self.path_buf.borrow_mut().clone();
415            Ok(Cursor {
416                prefix, /* set during the upsert call */
417                parent: self,
418            })
419        }
420    }
421
422    impl Cursor<'_, '_> {
423        /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written
424        /// to that point.
425        /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed.
426        /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly
427        /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory.
428        pub fn get<I, C>(&self, rela_path: I) -> Option<&tree::Entry>
429        where
430            I: IntoIterator<Item = C>,
431            C: AsRef<BStr>,
432        {
433            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
434            self.parent.get_inner(rela_path)
435        }
436
437        /// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree.
438        pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, super::Error>
439        where
440            I: IntoIterator<Item = C>,
441            C: AsRef<BStr>,
442        {
443            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
444            self.parent
445                .upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))?;
446            Ok(self)
447        }
448
449        /// Like [`Editor::remove()`], but with the constraint of only editing in this cursor's tree.
450        pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, super::Error>
451        where
452            I: IntoIterator<Item = C>,
453            C: AsRef<BStr>,
454        {
455            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
456            self.parent.upsert_or_remove_at_pathbuf(rela_path, None)?;
457            Ok(self)
458        }
459
460        /// Like [`Editor::write()`], but will write only the subtree of the cursor.
461        pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
462            self.parent.path_buf.borrow_mut().clone_from(&self.prefix);
463            self.parent.write_at_pathbuf(out, WriteMode::FromCursor)
464        }
465    }
466}
467
468#[derive(Copy, Clone, Eq, PartialEq)]
469enum UpsertMode {
470    Normal,
471    /// Only make sure there is a tree at the given location (requires kind tree and null-id)
472    AssureTreeOnly,
473}
474
475enum WriteMode {
476    Normal,
477    /// Perform less cleanup to assure parent-editor still stays intact
478    FromCursor,
479}
480
481fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering {
482    let common = a.filename.len().min(filename.len());
483    a.filename[..common].cmp(&filename[..common]).then_with(|| {
484        let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
485        let b = filename.get(common).or_else(|| is_tree.then_some(&b'/'));
486        a.cmp(&b)
487    })
488}
489
490fn filename(path: &BStr) -> &BStr {
491    path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
492}
493
494fn empty_path() -> BString {
495    BString::default()
496}
497
498fn push_path_component(base: &mut BString, component: &[u8]) -> usize {
499    let prev_len = base.len();
500    debug_assert!(base.last() != Some(&b'/'));
501    if !base.is_empty() {
502        base.push_byte(b'/');
503    }
504    base.push_str(component);
505    prev_len
506}