Skip to main content

gix_object/tree/
ref_iter.rs

1use std::ops::ControlFlow;
2
3use bstr::BStr;
4
5use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter};
6
7/// Advance a path lookup by matching the next path component against `tree`.
8///
9/// `components` must yield the remaining path components to resolve, and `tree` must be the
10/// current object to search in.
11///
12/// The return value indicates how the caller should proceed:
13///
14/// - [`ControlFlow::Continue`] contains the object id of the matched entry when there are more
15///   components left to resolve. Callers should load that object and pass it back into a subsequent
16///   invocation.
17/// - [`ControlFlow::Break`]`(Some(entry))` contains the matched entry for the final component, and
18///   signals that lookup completed successfully.
19/// - [`ControlFlow::Break`]`(None)` signals that lookup cannot continue and should stop without a
20///   match. This happens if `tree` is not a tree object, if `components` is already exhausted, or if
21///   the next component is not present in `tree`.
22///
23/// Note that this behaviour is tuned to prefer to exhaust the entire chain of `components`, only the
24/// last component can yield a [`ControlFlow::Break`].
25pub fn next_entry<'a, I, P>(
26    components: &mut core::iter::Peekable<I>,
27    tree: crate::Data<'a>,
28) -> core::ops::ControlFlow<Option<EntryRef<'a>>, gix_hash::ObjectId>
29where
30    I: Iterator<Item = P>,
31    P: PartialEq<BStr>,
32{
33    if !tree.kind.is_tree() {
34        return ControlFlow::Break(None);
35    }
36
37    let Some(component) = components.next() else {
38        return ControlFlow::Break(None);
39    };
40
41    let Some(entry) = TreeRefIter::from_bytes(tree.data, tree.hash_kind)
42        .filter_map(Result::ok)
43        .find(|entry| component.eq(entry.filename))
44    else {
45        return ControlFlow::Break(None);
46    };
47
48    if components.peek().is_none() {
49        ControlFlow::Break(Some(entry))
50    } else {
51        ControlFlow::Continue(entry.oid.to_owned())
52    }
53}
54
55impl<'a> TreeRefIter<'a> {
56    /// Instantiate an iterator from the given tree `data` and `hash_kind`.
57    pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> TreeRefIter<'a> {
58        TreeRefIter { data, hash_kind }
59    }
60
61    /// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer`
62    /// until the last component is looked up and its tree entry is returned.
63    ///
64    /// # Performance Notes
65    ///
66    /// Searching tree entries is currently done in sequence, which allows the search to be allocation free. It would be possible
67    /// to reuse a vector and use a binary search instead, which might be able to improve performance over all.
68    /// However, a benchmark should be created first to have some data and see which trade-off to choose here.
69    pub fn lookup_entry<I, P>(
70        &self,
71        odb: impl crate::Find,
72        buffer: &'a mut Vec<u8>,
73        path: I,
74    ) -> Result<Option<tree::Entry>, crate::find::Error>
75    where
76        I: IntoIterator<Item = P>,
77        P: PartialEq<BStr>,
78    {
79        buffer.clear();
80        buffer.extend_from_slice(self.data);
81
82        let mut iter = path.into_iter().peekable();
83        let mut data = crate::Data::new(buffer, crate::Kind::Tree, self.hash_kind);
84
85        loop {
86            data = match next_entry(&mut iter, data) {
87                ControlFlow::Continue(oid) => {
88                    let Some(next_tree) = odb.try_find(&oid, buffer)? else {
89                        break Ok(None);
90                    };
91                    next_tree
92                }
93                ControlFlow::Break(v) => break Ok(v.map(Into::into)),
94            }
95        }
96    }
97
98    /// Like [`Self::lookup_entry()`], but takes any [`AsRef<Path>`](`std::path::Path`) directly via `relative_path`,
99    /// a path relative to this tree.
100    /// `odb` and `buffer` are used to lookup intermediate trees.
101    ///
102    /// # Note
103    ///
104    /// If any path component contains illformed UTF-8 and thus can't be converted to bytes on platforms which can't do so natively,
105    /// the returned component will be empty which makes the lookup fail.
106    pub fn lookup_entry_by_path(
107        &self,
108        odb: impl crate::Find,
109        buffer: &'a mut Vec<u8>,
110        relative_path: impl AsRef<std::path::Path>,
111    ) -> Result<Option<tree::Entry>, crate::find::Error> {
112        self.lookup_entry(
113            odb,
114            buffer,
115            relative_path
116                .as_ref()
117                .components()
118                .map(|c| c.as_os_str().as_encoded_bytes()),
119        )
120    }
121}
122
123impl<'a> TreeRef<'a> {
124    /// Deserialize a Tree from `data`, assuming `hash_kind` to determine how the object ids are encoded in this particular tree.
125    pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> Result<TreeRef<'a>, crate::decode::Error> {
126        decode::tree(data, hash_kind.len_in_bytes())
127    }
128
129    /// Find an entry named `name` knowing if the entry is a directory or not, using a binary search.
130    ///
131    /// Note that it's impossible to binary search by name alone as the sort order is special.
132    pub fn bisect_entry(&self, name: &BStr, is_dir: bool) -> Option<EntryRef<'a>> {
133        static NULL_HASH: gix_hash::ObjectId = gix_hash::Kind::shortest().null();
134
135        let search = EntryRef {
136            mode: if is_dir {
137                tree::EntryKind::Tree
138            } else {
139                tree::EntryKind::Blob
140            }
141            .into(),
142            filename: name,
143            oid: &NULL_HASH,
144        };
145        self.entries
146            .binary_search_by(|e| e.cmp(&search))
147            .ok()
148            .map(|idx| self.entries[idx])
149    }
150
151    /// Create an instance of the empty tree.
152    ///
153    /// It's particularly useful as static part of a program.
154    pub const fn empty() -> TreeRef<'static> {
155        TreeRef { entries: Vec::new() }
156    }
157}
158
159impl<'a> TreeRefIter<'a> {
160    /// Consume self and return all parsed entries.
161    pub fn entries(self) -> Result<Vec<EntryRef<'a>>, crate::decode::Error> {
162        self.collect()
163    }
164
165    /// Return the offset in bytes that our data advanced from `buf`, the original buffer
166    /// to the beginning of the data of the tree.
167    ///
168    /// Then the tree-iteration can be resumed at the entry that would otherwise be returned next.
169    pub fn offset_to_next_entry(&self, buf: &[u8]) -> usize {
170        let before = (*buf).as_ptr();
171        let after = (*self.data).as_ptr();
172
173        debug_assert!(
174            before <= after,
175            "`TreeRefIter::offset_to_next_entry(): {after:?} <= {before:?}) violated"
176        );
177        (after as usize - before as usize) / std::mem::size_of::<u8>()
178    }
179}
180
181impl<'a> Iterator for TreeRefIter<'a> {
182    type Item = Result<EntryRef<'a>, crate::decode::Error>;
183
184    fn next(&mut self) -> Option<Self::Item> {
185        if self.data.is_empty() {
186            return None;
187        }
188        match decode::fast_entry(self.data, self.hash_kind.len_in_bytes()) {
189            Some((data_left, entry)) => {
190                self.data = data_left;
191                Some(Ok(entry))
192            }
193            None => {
194                self.data = &[];
195                Some(Err(crate::decode::Error))
196            }
197        }
198    }
199}
200
201impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
202    type Error = &'a [u8];
203
204    fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> {
205        tree::EntryMode::from_bytes(mode).ok_or(mode)
206    }
207}
208
209mod decode {
210    use bstr::ByteSlice;
211
212    use crate::{tree, tree::EntryRef, TreeRef};
213
214    pub fn fast_entry(i: &[u8], hash_len: usize) -> Option<(&[u8], EntryRef<'_>)> {
215        let (mode, i) = tree::EntryMode::extract_from_bytes(i)?;
216        let (filename, i) = i.split_at(i.find_byte(0)?);
217        let i = &i[1..];
218        let (oid, i) = match i.len() {
219            len if len < hash_len => return None,
220            _ => i.split_at(hash_len),
221        };
222        Some((
223            i,
224            EntryRef {
225                mode,
226                filename: filename.as_bstr(),
227                oid: gix_hash::oid::try_from_bytes(oid)
228                    .unwrap_or_else(|_| panic!("we counted exactly {hash_len} bytes")),
229            },
230        ))
231    }
232
233    pub fn tree(data: &[u8], hash_len: usize) -> Result<TreeRef<'_>, crate::decode::Error> {
234        let mut i = data;
235
236        // Calculate an estimate of the amount of entries to reduce
237        // the amount of allocations necessary.
238        // Note that this assumes that we want speed over fitting Vecs, this is a trade-off.
239        const AVERAGE_FILENAME_LEN: usize = 24;
240        const AVERAGE_MODE_LEN: usize = 6;
241        const ENTRY_DELIMITER_LEN: usize = 2; // space + trailing zero
242        const AVERAGE_TREE_ENTRIES: usize = 16 * 2; // prevent overallocation beyond what's meaningful or what could be dangerous
243        let average_entry_len = ENTRY_DELIMITER_LEN + hash_len + AVERAGE_MODE_LEN + AVERAGE_FILENAME_LEN;
244        let upper_bound = i.len() / average_entry_len;
245        let mut out = Vec::with_capacity(upper_bound.min(AVERAGE_TREE_ENTRIES));
246
247        while !i.is_empty() {
248            let Some((rest, entry)) = fast_entry(i, hash_len) else {
249                return Err(crate::decode::Error);
250            };
251            i = rest;
252            out.push(entry);
253        }
254        Ok(TreeRef { entries: out })
255    }
256}