Skip to main content

gix_object/tree/
mod.rs

1use std::{cell::RefCell, cmp::Ordering};
2
3use crate::{
4    Tree, TreeRef,
5    bstr::{BStr, BString},
6    tree,
7};
8
9///
10pub mod editor;
11
12mod ref_iter;
13pub use ref_iter::next_entry;
14
15///
16pub mod write;
17
18/// The state needed to apply edits instantly to in-memory trees.
19///
20/// It's made so that each tree is looked at in the object database at most once, and held in memory for
21/// all edits until everything is flushed to write all changed trees.
22///
23/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
24/// with some penalties.
25#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
26#[derive(Clone)]
27pub struct Editor<'a> {
28    /// A way to lookup trees.
29    find: &'a dyn crate::FindExt,
30    /// The kind of hashes to produce>
31    object_hash: gix_hash::Kind,
32    /// All trees we currently hold in memory. Each of these may change while adding and removing entries.
33    /// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
34    /// dropped when writing at the latest.
35    trees: std::collections::HashMap<BString, Tree>,
36    /// A buffer to build up paths when finding the tree to edit.
37    path_buf: RefCell<BString>,
38    /// Our buffer for storing tree-data in, right before decoding it.
39    tree_buf: Vec<u8>,
40}
41
42/// The mode of items storable in a tree, similar to the file mode on a unix file system.
43///
44/// Used in [`mutable::Entry`][crate::tree::Entry] and [`EntryRef`].
45///
46/// Note that even though it can be created from any `u16`, it should be preferable to
47/// create it by converting [`EntryKind`] into `EntryMode`.
48#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
49#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
50pub struct EntryMode {
51    // Represents the value read from Git, except that "040000" is represented with 0o140000 but
52    // "40000" is represented with 0o40000.
53    internal: u16,
54}
55
56impl TryFrom<u32> for tree::EntryMode {
57    type Error = u32;
58    fn try_from(mode: u32) -> Result<Self, Self::Error> {
59        Ok(match mode {
60            0o40000 | 0o120000 | 0o160000 => EntryMode { internal: mode as u16 },
61            blob_mode if blob_mode & 0o100000 == 0o100000 => EntryMode { internal: mode as u16 },
62            _ => return Err(mode),
63        })
64    }
65}
66
67impl EntryMode {
68    /// Expose the value as u16 (lossy, unlike the internal representation that is hidden).
69    pub const fn value(self) -> u16 {
70        // Demangle the hack: In the case where the second leftmost octet is 4 (Tree), the leftmost bit is
71        // there to represent whether the bytes representation should have 5 or 6 octets.
72        if self.internal & IFMT == 0o140000 {
73            0o040000
74        } else {
75            self.internal
76        }
77    }
78
79    /// Return the representation as used in the git internal format, which is octal and written
80    /// to the `backing` buffer. The respective sub-slice that was written to is returned.
81    pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
82        if self.internal == 0 {
83            std::slice::from_ref(&b'0')
84        } else {
85            for (idx, backing_octet) in backing.iter_mut().enumerate() {
86                let bit_pos = 3 /* because base 8 and 2^3 == 8*/ * (6 - idx - 1);
87                let oct_mask = 0b111 << bit_pos;
88                let digit = (self.internal & oct_mask) >> bit_pos;
89                *backing_octet = b'0' + digit as u8;
90            }
91            // Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
92            if backing[1] == b'4' {
93                if backing[0] == b'1' {
94                    backing[0] = b'0';
95                    &backing[0..6]
96                } else {
97                    &backing[1..6]
98                }
99            } else {
100                &backing[0..6]
101            }
102        }
103        .into()
104    }
105
106    /// Construct an EntryMode from bytes represented as in the git internal format
107    /// Return the mode and the remainder of the bytes.
108    pub(crate) fn extract_from_bytes(i: &[u8]) -> Option<(Self, &'_ [u8])> {
109        let mut mode = 0;
110        if i.is_empty() {
111            return None;
112        }
113
114        // Happy path: space is at index 6
115        let space_pos = if i.get(6) == Some(&b' ') && i.get(5) != Some(&b' ') {
116            for b in i.iter().take(6) {
117                let b = b.wrapping_sub(b'0') as u16;
118                // Not a pure octal input.
119                // Performance matters here, so `!(b'0'..=b'7').contains(&b)` won't do.
120                if b > 7 {
121                    return None;
122                }
123                mode = (mode << 3) + b;
124            }
125            6
126        }
127        // Space is not at index 6, we must find it.
128        else {
129            let mut idx = 0;
130            let mut space_pos = 0;
131
132            // const fn, this is why we can't have nice things (like `.iter().any()`).
133            while idx < i.len() {
134                let b = i[idx].wrapping_sub(b'0') as u16;
135                // Delimiter, return what we got
136                if b == b' '.wrapping_sub(b'0') as u16 {
137                    space_pos = idx;
138                    break;
139                }
140                // Not a pure octal input.
141                // Performance matters here, so `!(b'0'..=b'7').contains(&b)` won't do.
142                if b > 7 {
143                    return None;
144                }
145                // More than 6 octal digits we must have hit the delimiter or the input was malformed.
146                if idx > 6 {
147                    return None;
148                }
149                mode = (mode << 3) + b;
150                idx += 1;
151            }
152
153            space_pos
154        };
155
156        // Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
157        if mode == 0o040000 && i[0] == b'0' {
158            mode += 0o100000;
159        }
160        Some((Self { internal: mode }, &i[(space_pos + 1)..]))
161    }
162
163    /// Construct an EntryMode from bytes represented as in the git internal format.
164    pub fn from_bytes(i: &[u8]) -> Option<Self> {
165        Self::extract_from_bytes(i).map(|(mode, _rest)| mode)
166    }
167}
168
169impl std::fmt::Debug for EntryMode {
170    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171        write!(f, "EntryMode(0o{})", self.as_bytes(&mut Default::default()))
172    }
173}
174
175impl std::fmt::Octal for EntryMode {
176    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177        write!(f, "{}", self.as_bytes(&mut Default::default()))
178    }
179}
180
181/// A discretized version of ideal and valid values for entry modes.
182///
183/// Note that even though it can represent every valid [mode](EntryMode), it might
184/// lose information due to that as well.
185#[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
186#[repr(u16)]
187#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
188pub enum EntryKind {
189    /// A tree, or directory
190    Tree = 0o040000u16,
191    /// A file that is not executable
192    Blob = 0o100644,
193    /// A file that is executable
194    BlobExecutable = 0o100755,
195    /// A symbolic link
196    Link = 0o120000,
197    /// A commit of a git submodule
198    Commit = 0o160000,
199}
200
201impl From<EntryKind> for EntryMode {
202    fn from(value: EntryKind) -> Self {
203        EntryMode { internal: value as u16 }
204    }
205}
206
207impl From<EntryMode> for EntryKind {
208    fn from(value: EntryMode) -> Self {
209        value.kind()
210    }
211}
212
213/// Serialization
214impl EntryKind {
215    /// Return the representation as used in the git internal format.
216    pub fn as_octal_str(&self) -> &'static BStr {
217        use EntryKind::*;
218        let bytes: &[u8] = match self {
219            Tree => b"40000",
220            Blob => b"100644",
221            BlobExecutable => b"100755",
222            Link => b"120000",
223            Commit => b"160000",
224        };
225        bytes.into()
226    }
227}
228
229const IFMT: u16 = 0o170000;
230
231impl EntryMode {
232    /// Discretize the raw mode into an enum with well-known state while dropping unnecessary details.
233    pub const fn kind(&self) -> EntryKind {
234        let etype = self.value() & IFMT;
235        if etype == 0o100000 {
236            if self.value() & 0o000100 == 0o000100 {
237                EntryKind::BlobExecutable
238            } else {
239                EntryKind::Blob
240            }
241        } else if etype == EntryKind::Link as u16 {
242            EntryKind::Link
243        } else if etype == EntryKind::Tree as u16 {
244            EntryKind::Tree
245        } else {
246            EntryKind::Commit
247        }
248    }
249
250    /// Return true if this entry mode represents a Tree/directory
251    pub const fn is_tree(&self) -> bool {
252        self.value() & IFMT == EntryKind::Tree as u16
253    }
254
255    /// Return true if this entry mode represents the commit of a submodule.
256    pub const fn is_commit(&self) -> bool {
257        self.value() & IFMT == EntryKind::Commit as u16
258    }
259
260    /// Return true if this entry mode represents a symbolic link
261    pub const fn is_link(&self) -> bool {
262        self.value() & IFMT == EntryKind::Link as u16
263    }
264
265    /// Return true if this entry mode represents anything BUT Tree/directory
266    pub const fn is_no_tree(&self) -> bool {
267        self.value() & IFMT != EntryKind::Tree as u16
268    }
269
270    /// Return true if the entry is any kind of blob.
271    pub const fn is_blob(&self) -> bool {
272        self.value() & IFMT == 0o100000
273    }
274
275    /// Return true if the entry is an executable blob.
276    pub const fn is_executable(&self) -> bool {
277        matches!(self.kind(), EntryKind::BlobExecutable)
278    }
279
280    /// Return true if the entry is any kind of blob or symlink.
281    pub const fn is_blob_or_symlink(&self) -> bool {
282        matches!(
283            self.kind(),
284            EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link
285        )
286    }
287
288    /// Represent the mode as descriptive string.
289    pub const fn as_str(&self) -> &'static str {
290        use EntryKind::*;
291        match self.kind() {
292            Tree => "tree",
293            Blob => "blob",
294            BlobExecutable => "exe",
295            Link => "link",
296            Commit => "commit",
297        }
298    }
299}
300
301impl TreeRef<'_> {
302    /// Convert this instance into its own version, creating a copy of all data.
303    ///
304    /// This will temporarily allocate an extra copy in memory, so at worst three copies of the tree exist
305    /// at some intermediate point in time. Use [`Self::into_owned()`] to avoid this.
306    pub fn to_owned(&self) -> Tree {
307        self.clone().into()
308    }
309
310    /// Convert this instance into its own version, creating a copy of all data.
311    pub fn into_owned(self) -> Tree {
312        self.into()
313    }
314}
315
316/// An element of a [`TreeRef`][crate::TreeRef::entries].
317#[derive(PartialEq, Eq, Debug, Hash, Clone, Copy)]
318#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
319pub struct EntryRef<'a> {
320    /// The kind of object to which `oid` is pointing.
321    pub mode: tree::EntryMode,
322    /// The name of the file in the parent tree.
323    pub filename: &'a BStr,
324    /// The id of the object representing the entry.
325    // TODO: figure out how these should be called. id or oid? It's inconsistent around the codebase.
326    //       Answer: make it 'id', as in `git2`
327    #[cfg_attr(feature = "serde", serde(borrow))]
328    pub oid: &'a gix_hash::oid,
329}
330
331impl PartialOrd for EntryRef<'_> {
332    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
333        Some(self.cmp(other))
334    }
335}
336
337impl Ord for EntryRef<'_> {
338    fn cmp(&self, b: &Self) -> Ordering {
339        let a = self;
340        let common = a.filename.len().min(b.filename.len());
341        a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
342            let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
343            let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
344            a.cmp(&b)
345        })
346    }
347}
348
349/// An entry in a [`Tree`], similar to an entry in a directory.
350#[derive(PartialEq, Eq, Debug, Hash, Clone)]
351#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
352pub struct Entry {
353    /// The kind of object to which `oid` is pointing to.
354    pub mode: EntryMode,
355    /// The name of the file in the parent tree.
356    pub filename: BString,
357    /// The id of the object representing the entry.
358    pub oid: gix_hash::ObjectId,
359}
360
361impl PartialOrd for Entry {
362    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
363        Some(self.cmp(other))
364    }
365}
366
367impl Ord for Entry {
368    fn cmp(&self, b: &Self) -> Ordering {
369        let a = self;
370        let common = a.filename.len().min(b.filename.len());
371        a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
372            let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
373            let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
374            a.cmp(&b)
375        })
376    }
377}