gix_object/tree/
ref_iter.rs

1use bstr::BStr;
2use winnow::{error::ParserError, prelude::*};
3
4use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter};
5
6impl<'a> TreeRefIter<'a> {
7    /// Instantiate an iterator from the given tree data.
8    pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> {
9        TreeRefIter { data }
10    }
11
12    /// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer`
13    /// until the last component is looked up and its tree entry is returned.
14    ///
15    /// # Performance Notes
16    ///
17    /// Searching tree entries is currently done in sequence, which allows the search to be allocation free. It would be possible
18    /// to reuse a vector and use a binary search instead, which might be able to improve performance over all.
19    /// However, a benchmark should be created first to have some data and see which trade-off to choose here.
20    pub fn lookup_entry<I, P>(
21        &self,
22        odb: impl crate::Find,
23        buffer: &'a mut Vec<u8>,
24        path: I,
25    ) -> Result<Option<tree::Entry>, crate::find::Error>
26    where
27        I: IntoIterator<Item = P>,
28        P: PartialEq<BStr>,
29    {
30        buffer.clear();
31
32        let mut path = path.into_iter().peekable();
33        buffer.extend_from_slice(self.data);
34        while let Some(component) = path.next() {
35            match TreeRefIter::from_bytes(buffer)
36                .filter_map(Result::ok)
37                .find(|entry| component.eq(entry.filename))
38            {
39                Some(entry) => {
40                    if path.peek().is_none() {
41                        return Ok(Some(entry.into()));
42                    } else {
43                        let next_id = entry.oid.to_owned();
44                        let obj = odb.try_find(&next_id, buffer)?;
45                        let Some(obj) = obj else { return Ok(None) };
46                        if !obj.kind.is_tree() {
47                            return Ok(None);
48                        }
49                    }
50                }
51                None => return Ok(None),
52            }
53        }
54        Ok(None)
55    }
56
57    /// Like [`Self::lookup_entry()`], but takes any [`AsRef<Path>`](`std::path::Path`) directly via `relative_path`,
58    /// a path relative to this tree.
59    /// `odb` and `buffer` are used to lookup intermediate trees.
60    ///
61    /// # Note
62    ///
63    /// If any path component contains illformed UTF-8 and thus can't be converted to bytes on platforms which can't do so natively,
64    /// the returned component will be empty which makes the lookup fail.
65    pub fn lookup_entry_by_path(
66        &self,
67        odb: impl crate::Find,
68        buffer: &'a mut Vec<u8>,
69        relative_path: impl AsRef<std::path::Path>,
70    ) -> Result<Option<tree::Entry>, crate::find::Error> {
71        use crate::bstr::ByteSlice;
72        self.lookup_entry(
73            odb,
74            buffer,
75            relative_path.as_ref().components().map(|c: std::path::Component<'_>| {
76                gix_path::os_str_into_bstr(c.as_os_str())
77                    .unwrap_or_else(|_| "".into())
78                    .as_bytes()
79            }),
80        )
81    }
82}
83
84impl<'a> TreeRef<'a> {
85    /// Deserialize a Tree from `data`.
86    pub fn from_bytes(mut data: &'a [u8]) -> Result<TreeRef<'a>, crate::decode::Error> {
87        let input = &mut data;
88        match decode::tree.parse_next(input) {
89            Ok(tag) => Ok(tag),
90            Err(err) => Err(crate::decode::Error::with_err(err, input)),
91        }
92    }
93
94    /// Find an entry named `name` knowing if the entry is a directory or not, using a binary search.
95    ///
96    /// Note that it's impossible to binary search by name alone as the sort order is special.
97    pub fn bisect_entry(&self, name: &BStr, is_dir: bool) -> Option<EntryRef<'a>> {
98        static NULL_HASH: gix_hash::ObjectId = gix_hash::Kind::shortest().null();
99
100        let search = EntryRef {
101            mode: if is_dir {
102                tree::EntryKind::Tree
103            } else {
104                tree::EntryKind::Blob
105            }
106            .into(),
107            filename: name,
108            oid: &NULL_HASH,
109        };
110        self.entries
111            .binary_search_by(|e| e.cmp(&search))
112            .ok()
113            .map(|idx| self.entries[idx])
114    }
115
116    /// Create an instance of the empty tree.
117    ///
118    /// It's particularly useful as static part of a program.
119    pub const fn empty() -> TreeRef<'static> {
120        TreeRef { entries: Vec::new() }
121    }
122}
123
124impl<'a> TreeRefIter<'a> {
125    /// Consume self and return all parsed entries.
126    pub fn entries(self) -> Result<Vec<EntryRef<'a>>, crate::decode::Error> {
127        self.collect()
128    }
129
130    /// Return the offset in bytes that our data advanced from `buf`, the original buffer
131    /// to the beginning of the data of the tree.
132    ///
133    /// Then the tree-iteration can be resumed at the entry that would otherwise be returned next.
134    pub fn offset_to_next_entry(&self, buf: &[u8]) -> usize {
135        let before = (*buf).as_ptr();
136        let after = (*self.data).as_ptr();
137
138        debug_assert!(
139            before <= after,
140            "`TreeRefIter::offset_to_next_entry(): {after:?} <= {before:?}) violated"
141        );
142        (after as usize - before as usize) / std::mem::size_of::<u8>()
143    }
144}
145
146impl<'a> Iterator for TreeRefIter<'a> {
147    type Item = Result<EntryRef<'a>, crate::decode::Error>;
148
149    fn next(&mut self) -> Option<Self::Item> {
150        if self.data.is_empty() {
151            return None;
152        }
153        match decode::fast_entry(self.data) {
154            Some((data_left, entry)) => {
155                self.data = data_left;
156                Some(Ok(entry))
157            }
158            None => {
159                let failing = self.data;
160                self.data = &[];
161                #[allow(clippy::unit_arg)]
162                Some(Err(crate::decode::Error::with_err(
163                    winnow::error::ErrMode::from_input(&failing),
164                    failing,
165                )))
166            }
167        }
168    }
169}
170
171impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
172    type Error = &'a [u8];
173
174    fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> {
175        tree::EntryMode::from_bytes(mode).ok_or(mode)
176    }
177}
178
179mod decode {
180    use bstr::ByteSlice;
181    use winnow::{error::ParserError, prelude::*};
182
183    use crate::{tree, tree::EntryRef, TreeRef};
184
185    pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> {
186        let (mode, i) = tree::EntryMode::extract_from_bytes(i)?;
187        let (filename, i) = i.split_at(i.find_byte(0)?);
188        let i = &i[1..];
189        const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot
190        let (oid, i) = match i.len() {
191            len if len < HASH_LEN_FIXME => return None,
192            _ => i.split_at(20),
193        };
194        Some((
195            i,
196            EntryRef {
197                mode,
198                filename: filename.as_bstr(),
199                oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"),
200            },
201        ))
202    }
203
204    pub fn tree<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<TreeRef<'a>, E> {
205        let mut out = Vec::new();
206        let mut i = &**i;
207        while !i.is_empty() {
208            let Some((rest, entry)) = fast_entry(i) else {
209                #[allow(clippy::unit_arg)]
210                return Err(winnow::error::ErrMode::from_input(&i));
211            };
212            i = rest;
213            out.push(entry);
214        }
215        Ok(TreeRef { entries: out })
216    }
217}