Skip to main content

gix_object/tree/
mod.rs

1use std::{cell::RefCell, cmp::Ordering};
2
3use crate::{
4    bstr::{BStr, BString},
5    tree, Tree, TreeRef,
6};
7
8///
9pub mod editor;
10
11mod ref_iter;
12pub use ref_iter::next_entry;
13
14///
15pub mod write;
16
17/// The state needed to apply edits instantly to in-memory trees.
18///
19/// It's made so that each tree is looked at in the object database at most once, and held in memory for
20/// all edits until everything is flushed to write all changed trees.
21///
22/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
23/// with some penalties.
24#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
25#[derive(Clone)]
26pub struct Editor<'a> {
27    /// A way to lookup trees.
28    find: &'a dyn crate::FindExt,
29    /// The kind of hashes to produce>
30    object_hash: gix_hash::Kind,
31    /// All trees we currently hold in memory. Each of these may change while adding and removing entries.
32    /// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
33    /// dropped when writing at the latest.
34    trees: std::collections::HashMap<BString, Tree>,
35    /// A buffer to build up paths when finding the tree to edit.
36    path_buf: RefCell<BString>,
37    /// Our buffer for storing tree-data in, right before decoding it.
38    tree_buf: Vec<u8>,
39}
40
41/// The mode of items storable in a tree, similar to the file mode on a unix file system.
42///
43/// Used in [`mutable::Entry`][crate::tree::Entry] and [`EntryRef`].
44///
45/// Note that even though it can be created from any `u16`, it should be preferable to
46/// create it by converting [`EntryKind`] into `EntryMode`.
47#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct EntryMode {
50    // Represents the value read from Git, except that "040000" is represented with 0o140000 but
51    // "40000" is represented with 0o40000.
52    internal: u16,
53}
54
55impl TryFrom<u32> for tree::EntryMode {
56    type Error = u32;
57    fn try_from(mode: u32) -> Result<Self, Self::Error> {
58        Ok(match mode {
59            0o40000 | 0o120000 | 0o160000 => EntryMode { internal: mode as u16 },
60            blob_mode if blob_mode & 0o100000 == 0o100000 => EntryMode { internal: mode as u16 },
61            _ => return Err(mode),
62        })
63    }
64}
65
66impl EntryMode {
67    /// Expose the value as u16 (lossy, unlike the internal representation that is hidden).
68    pub const fn value(self) -> u16 {
69        // Demangle the hack: In the case where the second leftmost octet is 4 (Tree), the leftmost bit is
70        // there to represent whether the bytes representation should have 5 or 6 octets.
71        if self.internal & IFMT == 0o140000 {
72            0o040000
73        } else {
74            self.internal
75        }
76    }
77
78    /// Return the representation as used in the git internal format, which is octal and written
79    /// to the `backing` buffer. The respective sub-slice that was written to is returned.
80    pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
81        if self.internal == 0 {
82            std::slice::from_ref(&b'0')
83        } else {
84            for (idx, backing_octet) in backing.iter_mut().enumerate() {
85                let bit_pos = 3 /* because base 8 and 2^3 == 8*/ * (6 - idx - 1);
86                let oct_mask = 0b111 << bit_pos;
87                let digit = (self.internal & oct_mask) >> bit_pos;
88                *backing_octet = b'0' + digit as u8;
89            }
90            // Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
91            if backing[1] == b'4' {
92                if backing[0] == b'1' {
93                    backing[0] = b'0';
94                    &backing[0..6]
95                } else {
96                    &backing[1..6]
97                }
98            } else {
99                &backing[0..6]
100            }
101        }
102        .into()
103    }
104
105    /// Construct an EntryMode from bytes represented as in the git internal format
106    /// Return the mode and the remainder of the bytes.
107    pub(crate) fn extract_from_bytes(i: &[u8]) -> Option<(Self, &'_ [u8])> {
108        let mut mode = 0;
109        if i.is_empty() {
110            return None;
111        }
112
113        // Happy path: space is at index 6
114        let space_pos = if i.get(6) == Some(&b' ') && i.get(5) != Some(&b' ') {
115            for b in i.iter().take(6) {
116                let b = b.wrapping_sub(b'0') as u16;
117                // Not a pure octal input.
118                // Performance matters here, so `!(b'0'..=b'7').contains(&b)` won't do.
119                if b > 7 {
120                    return None;
121                }
122                mode = (mode << 3) + b;
123            }
124            6
125        }
126        // Space is not at index 6, we must find it.
127        else {
128            let mut idx = 0;
129            let mut space_pos = 0;
130
131            // const fn, this is why we can't have nice things (like `.iter().any()`).
132            while idx < i.len() {
133                let b = i[idx].wrapping_sub(b'0') as u16;
134                // Delimiter, return what we got
135                if b == b' '.wrapping_sub(b'0') as u16 {
136                    space_pos = idx;
137                    break;
138                }
139                // Not a pure octal input.
140                // Performance matters here, so `!(b'0'..=b'7').contains(&b)` won't do.
141                if b > 7 {
142                    return None;
143                }
144                // More than 6 octal digits we must have hit the delimiter or the input was malformed.
145                if idx > 6 {
146                    return None;
147                }
148                mode = (mode << 3) + b;
149                idx += 1;
150            }
151
152            space_pos
153        };
154
155        // Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
156        if mode == 0o040000 && i[0] == b'0' {
157            mode += 0o100000;
158        }
159        Some((Self { internal: mode }, &i[(space_pos + 1)..]))
160    }
161
162    /// Construct an EntryMode from bytes represented as in the git internal format.
163    pub fn from_bytes(i: &[u8]) -> Option<Self> {
164        Self::extract_from_bytes(i).map(|(mode, _rest)| mode)
165    }
166}
167
168impl std::fmt::Debug for EntryMode {
169    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170        write!(f, "EntryMode(0o{})", self.as_bytes(&mut Default::default()))
171    }
172}
173
174impl std::fmt::Octal for EntryMode {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        write!(f, "{}", self.as_bytes(&mut Default::default()))
177    }
178}
179
180/// A discretized version of ideal and valid values for entry modes.
181///
182/// Note that even though it can represent every valid [mode](EntryMode), it might
183/// lose information due to that as well.
184#[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
185#[repr(u16)]
186#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187pub enum EntryKind {
188    /// A tree, or directory
189    Tree = 0o040000u16,
190    /// A file that is not executable
191    Blob = 0o100644,
192    /// A file that is executable
193    BlobExecutable = 0o100755,
194    /// A symbolic link
195    Link = 0o120000,
196    /// A commit of a git submodule
197    Commit = 0o160000,
198}
199
200impl From<EntryKind> for EntryMode {
201    fn from(value: EntryKind) -> Self {
202        EntryMode { internal: value as u16 }
203    }
204}
205
206impl From<EntryMode> for EntryKind {
207    fn from(value: EntryMode) -> Self {
208        value.kind()
209    }
210}
211
212/// Serialization
213impl EntryKind {
214    /// Return the representation as used in the git internal format.
215    pub fn as_octal_str(&self) -> &'static BStr {
216        use EntryKind::*;
217        let bytes: &[u8] = match self {
218            Tree => b"40000",
219            Blob => b"100644",
220            BlobExecutable => b"100755",
221            Link => b"120000",
222            Commit => b"160000",
223        };
224        bytes.into()
225    }
226}
227
228const IFMT: u16 = 0o170000;
229
230impl EntryMode {
231    /// Discretize the raw mode into an enum with well-known state while dropping unnecessary details.
232    pub const fn kind(&self) -> EntryKind {
233        let etype = self.value() & IFMT;
234        if etype == 0o100000 {
235            if self.value() & 0o000100 == 0o000100 {
236                EntryKind::BlobExecutable
237            } else {
238                EntryKind::Blob
239            }
240        } else if etype == EntryKind::Link as u16 {
241            EntryKind::Link
242        } else if etype == EntryKind::Tree as u16 {
243            EntryKind::Tree
244        } else {
245            EntryKind::Commit
246        }
247    }
248
249    /// Return true if this entry mode represents a Tree/directory
250    pub const fn is_tree(&self) -> bool {
251        self.value() & IFMT == EntryKind::Tree as u16
252    }
253
254    /// Return true if this entry mode represents the commit of a submodule.
255    pub const fn is_commit(&self) -> bool {
256        self.value() & IFMT == EntryKind::Commit as u16
257    }
258
259    /// Return true if this entry mode represents a symbolic link
260    pub const fn is_link(&self) -> bool {
261        self.value() & IFMT == EntryKind::Link as u16
262    }
263
264    /// Return true if this entry mode represents anything BUT Tree/directory
265    pub const fn is_no_tree(&self) -> bool {
266        self.value() & IFMT != EntryKind::Tree as u16
267    }
268
269    /// Return true if the entry is any kind of blob.
270    pub const fn is_blob(&self) -> bool {
271        self.value() & IFMT == 0o100000
272    }
273
274    /// Return true if the entry is an executable blob.
275    pub const fn is_executable(&self) -> bool {
276        matches!(self.kind(), EntryKind::BlobExecutable)
277    }
278
279    /// Return true if the entry is any kind of blob or symlink.
280    pub const fn is_blob_or_symlink(&self) -> bool {
281        matches!(
282            self.kind(),
283            EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link
284        )
285    }
286
287    /// Represent the mode as descriptive string.
288    pub const fn as_str(&self) -> &'static str {
289        use EntryKind::*;
290        match self.kind() {
291            Tree => "tree",
292            Blob => "blob",
293            BlobExecutable => "exe",
294            Link => "link",
295            Commit => "commit",
296        }
297    }
298}
299
300impl TreeRef<'_> {
301    /// Convert this instance into its own version, creating a copy of all data.
302    ///
303    /// This will temporarily allocate an extra copy in memory, so at worst three copies of the tree exist
304    /// at some intermediate point in time. Use [`Self::into_owned()`] to avoid this.
305    pub fn to_owned(&self) -> Tree {
306        self.clone().into()
307    }
308
309    /// Convert this instance into its own version, creating a copy of all data.
310    pub fn into_owned(self) -> Tree {
311        self.into()
312    }
313}
314
315/// An element of a [`TreeRef`][crate::TreeRef::entries].
316#[derive(PartialEq, Eq, Debug, Hash, Clone, Copy)]
317#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
318pub struct EntryRef<'a> {
319    /// The kind of object to which `oid` is pointing.
320    pub mode: tree::EntryMode,
321    /// The name of the file in the parent tree.
322    pub filename: &'a BStr,
323    /// The id of the object representing the entry.
324    // TODO: figure out how these should be called. id or oid? It's inconsistent around the codebase.
325    //       Answer: make it 'id', as in `git2`
326    #[cfg_attr(feature = "serde", serde(borrow))]
327    pub oid: &'a gix_hash::oid,
328}
329
330impl PartialOrd for EntryRef<'_> {
331    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
332        Some(self.cmp(other))
333    }
334}
335
336impl Ord for EntryRef<'_> {
337    fn cmp(&self, b: &Self) -> Ordering {
338        let a = self;
339        let common = a.filename.len().min(b.filename.len());
340        a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
341            let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
342            let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
343            a.cmp(&b)
344        })
345    }
346}
347
348/// An entry in a [`Tree`], similar to an entry in a directory.
349#[derive(PartialEq, Eq, Debug, Hash, Clone)]
350#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
351pub struct Entry {
352    /// The kind of object to which `oid` is pointing to.
353    pub mode: EntryMode,
354    /// The name of the file in the parent tree.
355    pub filename: BString,
356    /// The id of the object representing the entry.
357    pub oid: gix_hash::ObjectId,
358}
359
360impl PartialOrd for Entry {
361    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
362        Some(self.cmp(other))
363    }
364}
365
366impl Ord for Entry {
367    fn cmp(&self, b: &Self) -> Ordering {
368        let a = self;
369        let common = a.filename.len().min(b.filename.len());
370        a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
371            let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
372            let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
373            a.cmp(&b)
374        })
375    }
376}