Skip to main content

objects/object/
tree.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Tree types: entries, structure, and supporting enums.
3
4use std::{fmt, path::Path};
5
6use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
7use sley::{ObjectFormat as GitObjectFormat, ObjectId as GitObjectId};
8
9use super::ContentHash;
10
11const TREE_FORMAT_VERSION: u8 = 2;
12const ENTRY_KIND_BLOB: u8 = 0;
13const ENTRY_KIND_TREE: u8 = 1;
14const ENTRY_KIND_SYMLINK: u8 = 2;
15const ENTRY_KIND_GITLINK: u8 = 3;
16const GIT_OBJECT_FORMAT_SHA1: u8 = 1;
17const GIT_OBJECT_FORMAT_SHA256: u8 = 2;
18
19// ── TreeError ───────────────────────────────────────────────────────
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum TreeError {
23    InvalidName(String),
24    InvalidStructure(String),
25}
26
27impl std::error::Error for TreeError {}
28
29impl fmt::Display for TreeError {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        match self {
32            TreeError::InvalidName(msg) => write!(f, "invalid tree entry name: {}", msg),
33            TreeError::InvalidStructure(msg) => write!(f, "invalid tree structure: {}", msg),
34        }
35    }
36}
37
38// ── FileMode ────────────────────────────────────────────────────────
39
40#[repr(u8)]
41#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
42pub enum FileMode {
43    Normal,
44    Executable,
45    Symlink,
46    Gitlink,
47}
48
49impl FileMode {
50    pub fn to_byte(&self) -> u8 {
51        match self {
52            FileMode::Normal => 0,
53            FileMode::Executable => 1,
54            FileMode::Symlink => 2,
55            FileMode::Gitlink => 3,
56        }
57    }
58
59    pub fn from_byte(b: u8) -> Option<Self> {
60        match b {
61            0 => Some(FileMode::Normal),
62            1 => Some(FileMode::Executable),
63            2 => Some(FileMode::Symlink),
64            3 => Some(FileMode::Gitlink),
65            _ => None,
66        }
67    }
68
69    pub fn to_unix_mode(&self) -> u32 {
70        match self {
71            FileMode::Normal => 0o100644,
72            FileMode::Executable => 0o100755,
73            FileMode::Symlink => 0o120000,
74            FileMode::Gitlink => 0o160000,
75        }
76    }
77}
78
79// ── EntryType ───────────────────────────────────────────────────────
80
81#[repr(u8)]
82#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
83pub enum EntryType {
84    Blob,
85    Tree,
86    Symlink,
87    Gitlink,
88}
89
90impl EntryType {
91    pub fn to_byte(&self) -> u8 {
92        match self {
93            EntryType::Blob => 0,
94            EntryType::Tree => 1,
95            EntryType::Symlink => 2,
96            EntryType::Gitlink => 3,
97        }
98    }
99
100    pub fn from_byte(b: u8) -> Option<Self> {
101        match b {
102            0 => Some(EntryType::Blob),
103            1 => Some(EntryType::Tree),
104            2 => Some(EntryType::Symlink),
105            3 => Some(EntryType::Gitlink),
106            _ => None,
107        }
108    }
109}
110
111// ── TreeEntryTarget ────────────────────────────────────────────────
112
113#[derive(Clone, Debug, PartialEq, Eq)]
114pub enum TreeEntryTarget {
115    Blob { hash: ContentHash, executable: bool },
116    Tree { hash: ContentHash },
117    Symlink { hash: ContentHash },
118    Gitlink { target: GitObjectId },
119}
120
121impl TreeEntryTarget {
122    pub fn entry_type(&self) -> EntryType {
123        match self {
124            TreeEntryTarget::Blob { .. } => EntryType::Blob,
125            TreeEntryTarget::Tree { .. } => EntryType::Tree,
126            TreeEntryTarget::Symlink { .. } => EntryType::Symlink,
127            TreeEntryTarget::Gitlink { .. } => EntryType::Gitlink,
128        }
129    }
130
131    pub fn mode(&self) -> FileMode {
132        match self {
133            TreeEntryTarget::Blob {
134                executable: true, ..
135            } => FileMode::Executable,
136            TreeEntryTarget::Blob { .. } => FileMode::Normal,
137            TreeEntryTarget::Tree { .. } => FileMode::Normal,
138            TreeEntryTarget::Symlink { .. } => FileMode::Symlink,
139            TreeEntryTarget::Gitlink { .. } => FileMode::Gitlink,
140        }
141    }
142
143    pub fn content_hash(&self) -> Option<ContentHash> {
144        match self {
145            TreeEntryTarget::Blob { hash, .. }
146            | TreeEntryTarget::Tree { hash }
147            | TreeEntryTarget::Symlink { hash } => Some(*hash),
148            TreeEntryTarget::Gitlink { .. } => None,
149        }
150    }
151
152    pub fn gitlink_target(&self) -> Option<GitObjectId> {
153        match self {
154            TreeEntryTarget::Gitlink { target } => Some(*target),
155            _ => None,
156        }
157    }
158
159    fn encoded_payload_len(&self) -> usize {
160        match self {
161            TreeEntryTarget::Blob { hash, .. }
162            | TreeEntryTarget::Tree { hash }
163            | TreeEntryTarget::Symlink { hash } => hash.as_bytes().len(),
164            TreeEntryTarget::Gitlink { target } => target.as_bytes().len(),
165        }
166    }
167
168    fn update_hasher(&self, hasher: &mut blake3::Hasher) {
169        hasher.update(&[self.mode().to_byte()]);
170        hasher.update(&[self.entry_type().to_byte()]);
171        match self {
172            TreeEntryTarget::Blob { hash, .. }
173            | TreeEntryTarget::Tree { hash }
174            | TreeEntryTarget::Symlink { hash } => hasher.update(hash.as_bytes()),
175            TreeEntryTarget::Gitlink { target } => {
176                hasher.update(&[git_format_to_tag(target.format())]);
177                hasher.update(target.as_bytes())
178            }
179        };
180    }
181}
182
183// ── TreeEntry ───────────────────────────────────────────────────────
184
185pub fn validate_name(name: &str) -> Result<(), TreeError> {
186    if name.is_empty() {
187        return Err(TreeError::InvalidName("entry name cannot be empty".into()));
188    }
189    if name == "." || name == ".." {
190        return Err(TreeError::InvalidName(format!(
191            "'{}' is not a valid entry name",
192            name
193        )));
194    }
195    if name.contains('/') || name.contains('\\') {
196        return Err(TreeError::InvalidName(
197            "entry name cannot contain path separators".into(),
198        ));
199    }
200    if name.bytes().any(|b| b < 0x20 || b == 0x7f) {
201        return Err(TreeError::InvalidName(
202            "entry name contains control characters".into(),
203        ));
204    }
205    Ok(())
206}
207
208#[derive(Clone, Debug, PartialEq, Eq)]
209pub struct TreeEntry {
210    name: String,
211    target: TreeEntryTarget,
212}
213
214impl TreeEntry {
215    #[cfg(test)]
216    pub(crate) fn new_unchecked_for_tests(
217        name: impl Into<String>,
218        target: TreeEntryTarget,
219    ) -> Self {
220        Self {
221            name: name.into(),
222            target,
223        }
224    }
225
226    pub(crate) fn validate(&self) -> Result<(), TreeError> {
227        validate_name(&self.name)
228    }
229
230    pub fn file(
231        name: impl Into<String>,
232        hash: ContentHash,
233        executable: bool,
234    ) -> Result<Self, TreeError> {
235        let name = name.into();
236        validate_name(&name)?;
237        Ok(Self {
238            name,
239            target: TreeEntryTarget::Blob { hash, executable },
240        })
241    }
242
243    pub fn directory(name: impl Into<String>, hash: ContentHash) -> Result<Self, TreeError> {
244        let name = name.into();
245        validate_name(&name)?;
246        Ok(Self {
247            name,
248            target: TreeEntryTarget::Tree { hash },
249        })
250    }
251
252    pub fn symlink(name: impl Into<String>, hash: ContentHash) -> Result<Self, TreeError> {
253        let name = name.into();
254        validate_name(&name)?;
255        Ok(Self {
256            name,
257            target: TreeEntryTarget::Symlink { hash },
258        })
259    }
260
261    pub fn gitlink(name: impl Into<String>, target: GitObjectId) -> Result<Self, TreeError> {
262        let name = name.into();
263        validate_name(&name)?;
264        Ok(Self {
265            name,
266            target: TreeEntryTarget::Gitlink { target },
267        })
268    }
269
270    pub fn name(&self) -> &str {
271        &self.name
272    }
273
274    pub fn set_name(&mut self, name: impl Into<String>) -> Result<(), TreeError> {
275        let name = name.into();
276        validate_name(&name)?;
277        self.name = name;
278        Ok(())
279    }
280
281    pub fn with_mode(&self, mode: FileMode) -> Result<Self, TreeError> {
282        match (&self.target, mode) {
283            (TreeEntryTarget::Blob { hash, .. }, FileMode::Normal | FileMode::Executable) => {
284                Self::file(self.name.clone(), *hash, mode == FileMode::Executable)
285            }
286            (TreeEntryTarget::Symlink { .. }, FileMode::Symlink)
287            | (TreeEntryTarget::Tree { .. }, _)
288            | (TreeEntryTarget::Gitlink { .. }, FileMode::Gitlink)
289                if mode == self.mode() =>
290            {
291                Ok(self.clone())
292            }
293            _ => Err(TreeError::InvalidStructure(format!(
294                "cannot apply mode {:?} to {:?} entry '{}'",
295                mode,
296                self.entry_type(),
297                self.name
298            ))),
299        }
300    }
301
302    pub fn target(&self) -> &TreeEntryTarget {
303        &self.target
304    }
305
306    pub fn entry_type(&self) -> EntryType {
307        self.target.entry_type()
308    }
309
310    pub fn mode(&self) -> FileMode {
311        self.target.mode()
312    }
313
314    pub fn content_hash(&self) -> Option<ContentHash> {
315        self.target.content_hash()
316    }
317
318    pub fn leaf_content_hash(&self) -> Option<ContentHash> {
319        match self.target {
320            TreeEntryTarget::Blob { hash, .. } | TreeEntryTarget::Symlink { hash } => Some(hash),
321            TreeEntryTarget::Tree { .. } | TreeEntryTarget::Gitlink { .. } => None,
322        }
323    }
324
325    pub fn require_content_hash(&self) -> ContentHash {
326        self.content_hash()
327            .expect("tree entry target does not carry a Heddle content hash")
328    }
329
330    pub fn blob_hash(&self) -> Option<ContentHash> {
331        match self.target {
332            TreeEntryTarget::Blob { hash, .. } => Some(hash),
333            _ => None,
334        }
335    }
336
337    pub fn tree_hash(&self) -> Option<ContentHash> {
338        match self.target {
339            TreeEntryTarget::Tree { hash } => Some(hash),
340            _ => None,
341        }
342    }
343
344    pub fn symlink_hash(&self) -> Option<ContentHash> {
345        match self.target {
346            TreeEntryTarget::Symlink { hash } => Some(hash),
347            _ => None,
348        }
349    }
350
351    pub fn gitlink_target(&self) -> Option<GitObjectId> {
352        self.target.gitlink_target()
353    }
354
355    pub fn is_tree(&self) -> bool {
356        self.entry_type() == EntryType::Tree
357    }
358
359    pub fn is_blob(&self) -> bool {
360        self.entry_type() == EntryType::Blob
361    }
362
363    pub fn is_symlink(&self) -> bool {
364        self.entry_type() == EntryType::Symlink
365    }
366
367    pub fn is_gitlink(&self) -> bool {
368        self.entry_type() == EntryType::Gitlink
369    }
370
371    pub fn is_executable(&self) -> bool {
372        self.mode() == FileMode::Executable
373    }
374
375    pub(crate) fn encoded_len(&self) -> usize {
376        1 + 1 + self.target.encoded_payload_len() + self.name.len() + 1
377    }
378
379    pub(crate) fn update_hasher(&self, hasher: &mut blake3::Hasher) {
380        self.target.update_hasher(hasher);
381        hasher.update(self.name.as_bytes());
382        hasher.update(&[0]);
383    }
384}
385
386// ── Tree ────────────────────────────────────────────────────────────
387
388#[derive(Clone, Debug, PartialEq, Eq)]
389pub struct Tree {
390    entries: Vec<TreeEntry>,
391}
392
393impl Tree {
394    pub fn new() -> Self {
395        Self {
396            entries: Vec::new(),
397        }
398    }
399
400    pub fn from_entries(mut entries: Vec<TreeEntry>) -> Self {
401        entries.sort_by(|a, b| a.name.cmp(&b.name));
402        Self { entries }
403    }
404
405    #[cfg(test)]
406    pub(crate) fn from_entries_unchecked_for_tests(entries: Vec<TreeEntry>) -> Self {
407        Self { entries }
408    }
409
410    pub fn validate(&self) -> Result<(), TreeError> {
411        let mut previous_name: Option<&str> = None;
412        for entry in &self.entries {
413            entry.validate()?;
414            if let Some(previous) = previous_name
415                && previous >= entry.name.as_str()
416            {
417                return Err(TreeError::InvalidStructure(
418                    "entries must be strictly sorted by name".to_string(),
419                ));
420            }
421            previous_name = Some(&entry.name);
422        }
423        Ok(())
424    }
425
426    pub fn entries(&self) -> &[TreeEntry] {
427        &self.entries
428    }
429
430    pub fn get(&self, name: &str) -> Option<&TreeEntry> {
431        let index = self
432            .entries
433            .binary_search_by(|entry| entry.name.as_str().cmp(name))
434            .ok()?;
435        self.entries.get(index)
436    }
437
438    pub fn insert(&mut self, entry: TreeEntry) {
439        self.entries.retain(|e| e.name != entry.name);
440        let pos = self
441            .entries
442            .iter()
443            .position(|e| e.name > entry.name)
444            .unwrap_or(self.entries.len());
445        self.entries.insert(pos, entry);
446    }
447
448    pub fn remove(&mut self, name: &str) -> Option<TreeEntry> {
449        let pos = self.entries.iter().position(|e| e.name == name)?;
450        Some(self.entries.remove(pos))
451    }
452
453    pub fn is_empty(&self) -> bool {
454        self.entries.is_empty()
455    }
456
457    pub fn len(&self) -> usize {
458        self.entries.len()
459    }
460
461    pub fn hash(&self) -> ContentHash {
462        let total_len: usize = self.entries.iter().map(TreeEntry::encoded_len).sum();
463        ContentHash::compute_typed_with_len("tree", total_len as u64, |hasher| {
464            for entry in &self.entries {
465                entry.update_hasher(hasher);
466            }
467        })
468    }
469
470    pub fn iter(&self) -> impl Iterator<Item = &TreeEntry> {
471        self.entries.iter()
472    }
473
474    pub fn get_path(&self, path: &Path) -> Option<&TreeEntry> {
475        let name = path.file_name()?.to_str()?;
476        if path.parent().is_none_or(|p| p.as_os_str().is_empty()) {
477            self.get(name)
478        } else {
479            None
480        }
481    }
482}
483
484// ── Durable V2 tree encoding ───────────────────────────────────────
485
486#[derive(Serialize, Deserialize)]
487struct EncodedTreeV2 {
488    version: u8,
489    entries: Vec<EncodedTreeEntryV2>,
490}
491
492#[derive(Serialize, Deserialize)]
493struct EncodedTreeEntryV2 {
494    name: String,
495    kind: u8,
496    hash: Option<ContentHash>,
497    executable: Option<bool>,
498    git_format: Option<u8>,
499    git_oid: Option<Vec<u8>>,
500}
501
502impl Serialize for Tree {
503    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
504    where
505        S: Serializer,
506    {
507        EncodedTreeV2::from(self).serialize(serializer)
508    }
509}
510
511impl<'de> Deserialize<'de> for Tree {
512    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
513    where
514        D: Deserializer<'de>,
515    {
516        let encoded = EncodedTreeV2::deserialize(deserializer)?;
517        Tree::try_from(encoded).map_err(de::Error::custom)
518    }
519}
520
521#[derive(Debug)]
522pub(crate) enum TreeDecodeError {
523    Decode(rmp_serde::decode::Error),
524    Invalid(TreeError),
525}
526
527impl From<rmp_serde::decode::Error> for TreeDecodeError {
528    fn from(error: rmp_serde::decode::Error) -> Self {
529        Self::Decode(error)
530    }
531}
532
533impl From<TreeError> for TreeDecodeError {
534    fn from(error: TreeError) -> Self {
535        Self::Invalid(error)
536    }
537}
538
539impl From<&Tree> for EncodedTreeV2 {
540    fn from(tree: &Tree) -> Self {
541        Self {
542            version: TREE_FORMAT_VERSION,
543            entries: tree.entries.iter().map(EncodedTreeEntryV2::from).collect(),
544        }
545    }
546}
547
548impl From<&TreeEntry> for EncodedTreeEntryV2 {
549    fn from(entry: &TreeEntry) -> Self {
550        match entry.target() {
551            TreeEntryTarget::Blob { hash, executable } => Self {
552                name: entry.name.clone(),
553                kind: ENTRY_KIND_BLOB,
554                hash: Some(*hash),
555                executable: Some(*executable),
556                git_format: None,
557                git_oid: None,
558            },
559            TreeEntryTarget::Tree { hash } => Self {
560                name: entry.name.clone(),
561                kind: ENTRY_KIND_TREE,
562                hash: Some(*hash),
563                executable: None,
564                git_format: None,
565                git_oid: None,
566            },
567            TreeEntryTarget::Symlink { hash } => Self {
568                name: entry.name.clone(),
569                kind: ENTRY_KIND_SYMLINK,
570                hash: Some(*hash),
571                executable: None,
572                git_format: None,
573                git_oid: None,
574            },
575            TreeEntryTarget::Gitlink { target } => Self {
576                name: entry.name.clone(),
577                kind: ENTRY_KIND_GITLINK,
578                hash: None,
579                executable: None,
580                git_format: Some(git_format_to_tag(target.format())),
581                git_oid: Some(target.as_bytes().to_vec()),
582            },
583        }
584    }
585}
586
587impl TryFrom<EncodedTreeV2> for Tree {
588    type Error = TreeError;
589
590    fn try_from(encoded: EncodedTreeV2) -> Result<Self, Self::Error> {
591        if encoded.version != TREE_FORMAT_VERSION {
592            return Err(TreeError::InvalidStructure(format!(
593                "unsupported tree format version {}; this binary writes {}",
594                encoded.version, TREE_FORMAT_VERSION
595            )));
596        }
597        let mut entries = Vec::with_capacity(encoded.entries.len());
598        for entry in encoded.entries {
599            entries.push(TreeEntry::try_from(entry)?);
600        }
601        let tree = Tree::from_entries(entries);
602        tree.validate()?;
603        Ok(tree)
604    }
605}
606
607impl Tree {
608    pub(crate) fn decode_current_msgpack(data: &[u8]) -> Result<Self, TreeDecodeError> {
609        let encoded: EncodedTreeV2 = rmp_serde::from_slice(data)?;
610        Ok(Tree::try_from(encoded)?)
611    }
612}
613
614impl TryFrom<EncodedTreeEntryV2> for TreeEntry {
615    type Error = TreeError;
616
617    fn try_from(encoded: EncodedTreeEntryV2) -> Result<Self, Self::Error> {
618        match encoded.kind {
619            ENTRY_KIND_BLOB => TreeEntry::file(
620                encoded.name,
621                required_hash(encoded.hash, ENTRY_KIND_BLOB)?,
622                encoded.executable.unwrap_or(false),
623            ),
624            ENTRY_KIND_TREE => {
625                TreeEntry::directory(encoded.name, required_hash(encoded.hash, ENTRY_KIND_TREE)?)
626            }
627            ENTRY_KIND_SYMLINK => TreeEntry::symlink(
628                encoded.name,
629                required_hash(encoded.hash, ENTRY_KIND_SYMLINK)?,
630            ),
631            ENTRY_KIND_GITLINK => {
632                let format = git_format_from_tag(required_git_format(
633                    encoded.git_format,
634                    ENTRY_KIND_GITLINK,
635                )?)?;
636                let oid = encoded.git_oid.ok_or_else(|| {
637                    TreeError::InvalidStructure("gitlink entry is missing git_oid".into())
638                })?;
639                let target = GitObjectId::from_raw(format, &oid).map_err(|err| {
640                    TreeError::InvalidStructure(format!("invalid gitlink target: {err}"))
641                })?;
642                TreeEntry::gitlink(encoded.name, target)
643            }
644            other => Err(TreeError::InvalidStructure(format!(
645                "unknown tree entry kind {other}"
646            ))),
647        }
648    }
649}
650
651fn required_hash(hash: Option<ContentHash>, kind: u8) -> Result<ContentHash, TreeError> {
652    hash.ok_or_else(|| TreeError::InvalidStructure(format!("entry kind {kind} is missing hash")))
653}
654
655fn required_git_format(format: Option<u8>, kind: u8) -> Result<u8, TreeError> {
656    format.ok_or_else(|| {
657        TreeError::InvalidStructure(format!("entry kind {kind} is missing git_format"))
658    })
659}
660
661fn git_format_to_tag(format: GitObjectFormat) -> u8 {
662    match format {
663        GitObjectFormat::Sha1 => GIT_OBJECT_FORMAT_SHA1,
664        GitObjectFormat::Sha256 => GIT_OBJECT_FORMAT_SHA256,
665    }
666}
667
668fn git_format_from_tag(tag: u8) -> Result<GitObjectFormat, TreeError> {
669    match tag {
670        GIT_OBJECT_FORMAT_SHA1 => Ok(GitObjectFormat::Sha1),
671        GIT_OBJECT_FORMAT_SHA256 => Ok(GitObjectFormat::Sha256),
672        other => Err(TreeError::InvalidStructure(format!(
673            "unknown git object format tag {other}"
674        ))),
675    }
676}
677
678impl Default for Tree {
679    fn default() -> Self {
680        Self::new()
681    }
682}
683
684impl IntoIterator for Tree {
685    type Item = TreeEntry;
686    type IntoIter = std::vec::IntoIter<TreeEntry>;
687
688    fn into_iter(self) -> Self::IntoIter {
689        self.entries.into_iter()
690    }
691}
692
693impl<'a> IntoIterator for &'a Tree {
694    type Item = &'a TreeEntry;
695    type IntoIter = std::slice::Iter<'a, TreeEntry>;
696
697    fn into_iter(self) -> Self::IntoIter {
698        self.entries.iter()
699    }
700}