Skip to main content

gix_object/tree/
ref_iter.rs

1use std::ops::ControlFlow;
2
3use bstr::BStr;
4use winnow::error::ParserError;
5
6use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter};
7
8/// Advance a path lookup by matching the next path component against `tree`.
9///
10/// `components` must yield the remaining path components to resolve, and `tree` must be the
11/// current object to search in.
12///
13/// The return value indicates how the caller should proceed:
14///
15/// - [`ControlFlow::Continue`] contains the object id of the matched entry when there are more
16///   components left to resolve. Callers should load that object and pass it back into a subsequent
17///   invocation.
18/// - [`ControlFlow::Break`]`(Some(entry))` contains the matched entry for the final component, and
19///   signals that lookup completed successfully.
20/// - [`ControlFlow::Break`]`(None)` signals that lookup cannot continue and should stop without a
21///   match. This happens if `tree` is not a tree object, if `components` is already exhausted, or if
22///   the next component is not present in `tree`.
23///
24/// Note that this behaviour is tuned to prefer to exhaust the entire chain of `components`, only the
25/// last component can yield a [`ControlFlow::Break`].
26pub fn next_entry<'a, I, P>(
27    components: &mut core::iter::Peekable<I>,
28    tree: crate::Data<'a>,
29) -> core::ops::ControlFlow<Option<EntryRef<'a>>, gix_hash::ObjectId>
30where
31    I: Iterator<Item = P>,
32    P: PartialEq<BStr>,
33{
34    if !tree.kind.is_tree() {
35        return ControlFlow::Break(None);
36    }
37
38    let Some(component) = components.next() else {
39        return ControlFlow::Break(None);
40    };
41
42    let Some(entry) = TreeRefIter::from_bytes(tree.data, tree.hash_kind)
43        .filter_map(Result::ok)
44        .find(|entry| component.eq(entry.filename))
45    else {
46        return ControlFlow::Break(None);
47    };
48
49    if components.peek().is_none() {
50        ControlFlow::Break(Some(entry))
51    } else {
52        ControlFlow::Continue(entry.oid.to_owned())
53    }
54}
55
56impl<'a> TreeRefIter<'a> {
57    /// Instantiate an iterator from the given tree `data` and `hash_kind`.
58    pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> TreeRefIter<'a> {
59        TreeRefIter { data, hash_kind }
60    }
61
62    /// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer`
63    /// until the last component is looked up and its tree entry is returned.
64    ///
65    /// # Performance Notes
66    ///
67    /// Searching tree entries is currently done in sequence, which allows the search to be allocation free. It would be possible
68    /// to reuse a vector and use a binary search instead, which might be able to improve performance over all.
69    /// However, a benchmark should be created first to have some data and see which trade-off to choose here.
70    pub fn lookup_entry<I, P>(
71        &self,
72        odb: impl crate::Find,
73        buffer: &'a mut Vec<u8>,
74        path: I,
75    ) -> Result<Option<tree::Entry>, crate::find::Error>
76    where
77        I: IntoIterator<Item = P>,
78        P: PartialEq<BStr>,
79    {
80        buffer.clear();
81        buffer.extend_from_slice(self.data);
82
83        let mut iter = path.into_iter().peekable();
84        let mut data = crate::Data::new(buffer, crate::Kind::Tree, self.hash_kind);
85
86        loop {
87            data = match next_entry(&mut iter, data) {
88                ControlFlow::Continue(oid) => {
89                    let Some(next_tree) = odb.try_find(&oid, buffer)? else {
90                        break Ok(None);
91                    };
92                    next_tree
93                }
94                ControlFlow::Break(v) => break Ok(v.map(Into::into)),
95            }
96        }
97    }
98
99    /// Like [`Self::lookup_entry()`], but takes any [`AsRef<Path>`](`std::path::Path`) directly via `relative_path`,
100    /// a path relative to this tree.
101    /// `odb` and `buffer` are used to lookup intermediate trees.
102    ///
103    /// # Note
104    ///
105    /// If any path component contains illformed UTF-8 and thus can't be converted to bytes on platforms which can't do so natively,
106    /// the returned component will be empty which makes the lookup fail.
107    pub fn lookup_entry_by_path(
108        &self,
109        odb: impl crate::Find,
110        buffer: &'a mut Vec<u8>,
111        relative_path: impl AsRef<std::path::Path>,
112    ) -> Result<Option<tree::Entry>, crate::find::Error> {
113        self.lookup_entry(
114            odb,
115            buffer,
116            relative_path
117                .as_ref()
118                .components()
119                .map(|c| c.as_os_str().as_encoded_bytes()),
120        )
121    }
122}
123
124impl<'a> TreeRef<'a> {
125    /// Deserialize a Tree from `data`, assuming `hash_kind` to determine how the object ids are encoded in this particular tree.
126    pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> Result<TreeRef<'a>, crate::decode::Error> {
127        decode::tree(data, hash_kind.len_in_bytes())
128    }
129
130    /// Find an entry named `name` knowing if the entry is a directory or not, using a binary search.
131    ///
132    /// Note that it's impossible to binary search by name alone as the sort order is special.
133    pub fn bisect_entry(&self, name: &BStr, is_dir: bool) -> Option<EntryRef<'a>> {
134        static NULL_HASH: gix_hash::ObjectId = gix_hash::Kind::shortest().null();
135
136        let search = EntryRef {
137            mode: if is_dir {
138                tree::EntryKind::Tree
139            } else {
140                tree::EntryKind::Blob
141            }
142            .into(),
143            filename: name,
144            oid: &NULL_HASH,
145        };
146        self.entries
147            .binary_search_by(|e| e.cmp(&search))
148            .ok()
149            .map(|idx| self.entries[idx])
150    }
151
152    /// Create an instance of the empty tree.
153    ///
154    /// It's particularly useful as static part of a program.
155    pub const fn empty() -> TreeRef<'static> {
156        TreeRef { entries: Vec::new() }
157    }
158}
159
160impl<'a> TreeRefIter<'a> {
161    /// Consume self and return all parsed entries.
162    pub fn entries(self) -> Result<Vec<EntryRef<'a>>, crate::decode::Error> {
163        self.collect()
164    }
165
166    /// Return the offset in bytes that our data advanced from `buf`, the original buffer
167    /// to the beginning of the data of the tree.
168    ///
169    /// Then the tree-iteration can be resumed at the entry that would otherwise be returned next.
170    pub fn offset_to_next_entry(&self, buf: &[u8]) -> usize {
171        let before = (*buf).as_ptr();
172        let after = (*self.data).as_ptr();
173
174        debug_assert!(
175            before <= after,
176            "`TreeRefIter::offset_to_next_entry(): {after:?} <= {before:?}) violated"
177        );
178        (after as usize - before as usize) / std::mem::size_of::<u8>()
179    }
180}
181
182impl<'a> Iterator for TreeRefIter<'a> {
183    type Item = Result<EntryRef<'a>, crate::decode::Error>;
184
185    fn next(&mut self) -> Option<Self::Item> {
186        if self.data.is_empty() {
187            return None;
188        }
189        match decode::fast_entry(self.data, self.hash_kind.len_in_bytes()) {
190            Some((data_left, entry)) => {
191                self.data = data_left;
192                Some(Ok(entry))
193            }
194            None => {
195                let failing = self.data;
196                self.data = &[];
197                #[allow(clippy::unit_arg)]
198                Some(Err(crate::decode::Error::with_err(
199                    winnow::error::ErrMode::from_input(&failing),
200                    failing,
201                )))
202            }
203        }
204    }
205}
206
207impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
208    type Error = &'a [u8];
209
210    fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> {
211        tree::EntryMode::from_bytes(mode).ok_or(mode)
212    }
213}
214
215mod decode {
216    use bstr::ByteSlice;
217    use winnow::error::ParserError;
218
219    use crate::{tree, tree::EntryRef, TreeRef};
220
221    pub fn fast_entry(i: &[u8], hash_len: usize) -> Option<(&[u8], EntryRef<'_>)> {
222        let (mode, i) = tree::EntryMode::extract_from_bytes(i)?;
223        let (filename, i) = i.split_at(i.find_byte(0)?);
224        let i = &i[1..];
225        let (oid, i) = match i.len() {
226            len if len < hash_len => return None,
227            _ => i.split_at(hash_len),
228        };
229        Some((
230            i,
231            EntryRef {
232                mode,
233                filename: filename.as_bstr(),
234                oid: gix_hash::oid::try_from_bytes(oid)
235                    .unwrap_or_else(|_| panic!("we counted exactly {hash_len} bytes")),
236            },
237        ))
238    }
239
240    pub fn tree(data: &[u8], hash_len: usize) -> Result<TreeRef<'_>, crate::decode::Error> {
241        let mut i = data;
242
243        // Calculate an estimate of the amount of entries to reduce
244        // the amount of allocations necessary.
245        // Note that this assumes that we want speed over fitting Vecs, this is a trade-off.
246        const AVERAGE_FILENAME_LEN: usize = 24;
247        const AVERAGE_MODE_LEN: usize = 6;
248        const ENTRY_DELIMITER_LEN: usize = 2; // space + trailing zero
249        const AVERAGE_TREE_ENTRIES: usize = 16 * 2; // prevent overallocation beyond what's meaningful or what could be dangerous
250        let average_entry_len = ENTRY_DELIMITER_LEN + hash_len + AVERAGE_MODE_LEN + AVERAGE_FILENAME_LEN;
251        let upper_bound = i.len() / average_entry_len;
252        let mut out = Vec::with_capacity(upper_bound.min(AVERAGE_TREE_ENTRIES));
253
254        while !i.is_empty() {
255            let Some((rest, entry)) = fast_entry(i, hash_len) else {
256                #[allow(clippy::unit_arg)]
257                return Err(crate::decode::Error::with_err(
258                    winnow::error::ErrMode::from_input(&i),
259                    i,
260                ));
261            };
262            i = rest;
263            out.push(entry);
264        }
265        Ok(TreeRef { entries: out })
266    }
267}