1use std::{fmt, path::Path};
5
6use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
7use sley::{ObjectFormat as GitObjectFormat, ObjectId as GitObjectId};
8
9use super::ContentHash;
10
11const TREE_FORMAT_VERSION: u8 = 2;
12const ENTRY_KIND_BLOB: u8 = 0;
13const ENTRY_KIND_TREE: u8 = 1;
14const ENTRY_KIND_SYMLINK: u8 = 2;
15const ENTRY_KIND_GITLINK: u8 = 3;
16const GIT_OBJECT_FORMAT_SHA1: u8 = 1;
17const GIT_OBJECT_FORMAT_SHA256: u8 = 2;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum TreeError {
23 InvalidName(String),
24 InvalidStructure(String),
25}
26
27impl std::error::Error for TreeError {}
28
29impl fmt::Display for TreeError {
30 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31 match self {
32 TreeError::InvalidName(msg) => write!(f, "invalid tree entry name: {}", msg),
33 TreeError::InvalidStructure(msg) => write!(f, "invalid tree structure: {}", msg),
34 }
35 }
36}
37
38#[repr(u8)]
41#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
42pub enum FileMode {
43 Normal,
44 Executable,
45 Symlink,
46 Gitlink,
47}
48
49impl FileMode {
50 pub fn to_byte(&self) -> u8 {
51 match self {
52 FileMode::Normal => 0,
53 FileMode::Executable => 1,
54 FileMode::Symlink => 2,
55 FileMode::Gitlink => 3,
56 }
57 }
58
59 pub fn from_byte(b: u8) -> Option<Self> {
60 match b {
61 0 => Some(FileMode::Normal),
62 1 => Some(FileMode::Executable),
63 2 => Some(FileMode::Symlink),
64 3 => Some(FileMode::Gitlink),
65 _ => None,
66 }
67 }
68
69 pub fn to_unix_mode(&self) -> u32 {
70 match self {
71 FileMode::Normal => 0o100644,
72 FileMode::Executable => 0o100755,
73 FileMode::Symlink => 0o120000,
74 FileMode::Gitlink => 0o160000,
75 }
76 }
77}
78
79#[repr(u8)]
82#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
83pub enum EntryType {
84 Blob,
85 Tree,
86 Symlink,
87 Gitlink,
88}
89
90impl EntryType {
91 pub fn to_byte(&self) -> u8 {
92 match self {
93 EntryType::Blob => 0,
94 EntryType::Tree => 1,
95 EntryType::Symlink => 2,
96 EntryType::Gitlink => 3,
97 }
98 }
99
100 pub fn from_byte(b: u8) -> Option<Self> {
101 match b {
102 0 => Some(EntryType::Blob),
103 1 => Some(EntryType::Tree),
104 2 => Some(EntryType::Symlink),
105 3 => Some(EntryType::Gitlink),
106 _ => None,
107 }
108 }
109}
110
111#[derive(Clone, Debug, PartialEq, Eq)]
114pub enum TreeEntryTarget {
115 Blob { hash: ContentHash, executable: bool },
116 Tree { hash: ContentHash },
117 Symlink { hash: ContentHash },
118 Gitlink { target: GitObjectId },
119}
120
121impl TreeEntryTarget {
122 pub fn entry_type(&self) -> EntryType {
123 match self {
124 TreeEntryTarget::Blob { .. } => EntryType::Blob,
125 TreeEntryTarget::Tree { .. } => EntryType::Tree,
126 TreeEntryTarget::Symlink { .. } => EntryType::Symlink,
127 TreeEntryTarget::Gitlink { .. } => EntryType::Gitlink,
128 }
129 }
130
131 pub fn mode(&self) -> FileMode {
132 match self {
133 TreeEntryTarget::Blob {
134 executable: true, ..
135 } => FileMode::Executable,
136 TreeEntryTarget::Blob { .. } => FileMode::Normal,
137 TreeEntryTarget::Tree { .. } => FileMode::Normal,
138 TreeEntryTarget::Symlink { .. } => FileMode::Symlink,
139 TreeEntryTarget::Gitlink { .. } => FileMode::Gitlink,
140 }
141 }
142
143 pub fn content_hash(&self) -> Option<ContentHash> {
144 match self {
145 TreeEntryTarget::Blob { hash, .. }
146 | TreeEntryTarget::Tree { hash }
147 | TreeEntryTarget::Symlink { hash } => Some(*hash),
148 TreeEntryTarget::Gitlink { .. } => None,
149 }
150 }
151
152 pub fn gitlink_target(&self) -> Option<GitObjectId> {
153 match self {
154 TreeEntryTarget::Gitlink { target } => Some(*target),
155 _ => None,
156 }
157 }
158
159 fn encoded_payload_len(&self) -> usize {
160 match self {
161 TreeEntryTarget::Blob { hash, .. }
162 | TreeEntryTarget::Tree { hash }
163 | TreeEntryTarget::Symlink { hash } => hash.as_bytes().len(),
164 TreeEntryTarget::Gitlink { target } => target.as_bytes().len(),
165 }
166 }
167
168 fn update_hasher(&self, hasher: &mut blake3::Hasher) {
169 hasher.update(&[self.mode().to_byte()]);
170 hasher.update(&[self.entry_type().to_byte()]);
171 match self {
172 TreeEntryTarget::Blob { hash, .. }
173 | TreeEntryTarget::Tree { hash }
174 | TreeEntryTarget::Symlink { hash } => hasher.update(hash.as_bytes()),
175 TreeEntryTarget::Gitlink { target } => {
176 hasher.update(&[git_format_to_tag(target.format())]);
177 hasher.update(target.as_bytes())
178 }
179 };
180 }
181}
182
183pub fn validate_name(name: &str) -> Result<(), TreeError> {
186 if name.is_empty() {
187 return Err(TreeError::InvalidName("entry name cannot be empty".into()));
188 }
189 if name == "." || name == ".." {
190 return Err(TreeError::InvalidName(format!(
191 "'{}' is not a valid entry name",
192 name
193 )));
194 }
195 if name.contains('/') || name.contains('\\') {
196 return Err(TreeError::InvalidName(
197 "entry name cannot contain path separators".into(),
198 ));
199 }
200 if name.bytes().any(|b| b < 0x20 || b == 0x7f) {
201 return Err(TreeError::InvalidName(
202 "entry name contains control characters".into(),
203 ));
204 }
205 Ok(())
206}
207
208#[derive(Clone, Debug, PartialEq, Eq)]
209pub struct TreeEntry {
210 name: String,
211 target: TreeEntryTarget,
212}
213
214impl TreeEntry {
215 #[cfg(test)]
216 pub(crate) fn new_unchecked_for_tests(
217 name: impl Into<String>,
218 target: TreeEntryTarget,
219 ) -> Self {
220 Self {
221 name: name.into(),
222 target,
223 }
224 }
225
226 pub(crate) fn validate(&self) -> Result<(), TreeError> {
227 validate_name(&self.name)
228 }
229
230 pub fn file(
231 name: impl Into<String>,
232 hash: ContentHash,
233 executable: bool,
234 ) -> Result<Self, TreeError> {
235 let name = name.into();
236 validate_name(&name)?;
237 Ok(Self {
238 name,
239 target: TreeEntryTarget::Blob { hash, executable },
240 })
241 }
242
243 pub fn directory(name: impl Into<String>, hash: ContentHash) -> Result<Self, TreeError> {
244 let name = name.into();
245 validate_name(&name)?;
246 Ok(Self {
247 name,
248 target: TreeEntryTarget::Tree { hash },
249 })
250 }
251
252 pub fn symlink(name: impl Into<String>, hash: ContentHash) -> Result<Self, TreeError> {
253 let name = name.into();
254 validate_name(&name)?;
255 Ok(Self {
256 name,
257 target: TreeEntryTarget::Symlink { hash },
258 })
259 }
260
261 pub fn gitlink(name: impl Into<String>, target: GitObjectId) -> Result<Self, TreeError> {
262 let name = name.into();
263 validate_name(&name)?;
264 Ok(Self {
265 name,
266 target: TreeEntryTarget::Gitlink { target },
267 })
268 }
269
270 pub fn name(&self) -> &str {
271 &self.name
272 }
273
274 pub fn set_name(&mut self, name: impl Into<String>) -> Result<(), TreeError> {
275 let name = name.into();
276 validate_name(&name)?;
277 self.name = name;
278 Ok(())
279 }
280
281 pub fn with_mode(&self, mode: FileMode) -> Result<Self, TreeError> {
282 match (&self.target, mode) {
283 (TreeEntryTarget::Blob { hash, .. }, FileMode::Normal | FileMode::Executable) => {
284 Self::file(self.name.clone(), *hash, mode == FileMode::Executable)
285 }
286 (TreeEntryTarget::Symlink { .. }, FileMode::Symlink)
287 | (TreeEntryTarget::Tree { .. }, _)
288 | (TreeEntryTarget::Gitlink { .. }, FileMode::Gitlink)
289 if mode == self.mode() =>
290 {
291 Ok(self.clone())
292 }
293 _ => Err(TreeError::InvalidStructure(format!(
294 "cannot apply mode {:?} to {:?} entry '{}'",
295 mode,
296 self.entry_type(),
297 self.name
298 ))),
299 }
300 }
301
302 pub fn target(&self) -> &TreeEntryTarget {
303 &self.target
304 }
305
306 pub fn entry_type(&self) -> EntryType {
307 self.target.entry_type()
308 }
309
310 pub fn mode(&self) -> FileMode {
311 self.target.mode()
312 }
313
314 pub fn content_hash(&self) -> Option<ContentHash> {
315 self.target.content_hash()
316 }
317
318 pub fn leaf_content_hash(&self) -> Option<ContentHash> {
319 match self.target {
320 TreeEntryTarget::Blob { hash, .. } | TreeEntryTarget::Symlink { hash } => Some(hash),
321 TreeEntryTarget::Tree { .. } | TreeEntryTarget::Gitlink { .. } => None,
322 }
323 }
324
325 pub fn require_content_hash(&self) -> ContentHash {
326 self.content_hash()
327 .expect("tree entry target does not carry a Heddle content hash")
328 }
329
330 pub fn blob_hash(&self) -> Option<ContentHash> {
331 match self.target {
332 TreeEntryTarget::Blob { hash, .. } => Some(hash),
333 _ => None,
334 }
335 }
336
337 pub fn tree_hash(&self) -> Option<ContentHash> {
338 match self.target {
339 TreeEntryTarget::Tree { hash } => Some(hash),
340 _ => None,
341 }
342 }
343
344 pub fn symlink_hash(&self) -> Option<ContentHash> {
345 match self.target {
346 TreeEntryTarget::Symlink { hash } => Some(hash),
347 _ => None,
348 }
349 }
350
351 pub fn gitlink_target(&self) -> Option<GitObjectId> {
352 self.target.gitlink_target()
353 }
354
355 pub fn is_tree(&self) -> bool {
356 self.entry_type() == EntryType::Tree
357 }
358
359 pub fn is_blob(&self) -> bool {
360 self.entry_type() == EntryType::Blob
361 }
362
363 pub fn is_symlink(&self) -> bool {
364 self.entry_type() == EntryType::Symlink
365 }
366
367 pub fn is_gitlink(&self) -> bool {
368 self.entry_type() == EntryType::Gitlink
369 }
370
371 pub fn is_executable(&self) -> bool {
372 self.mode() == FileMode::Executable
373 }
374
375 pub(crate) fn encoded_len(&self) -> usize {
376 1 + 1 + self.target.encoded_payload_len() + self.name.len() + 1
377 }
378
379 pub(crate) fn update_hasher(&self, hasher: &mut blake3::Hasher) {
380 self.target.update_hasher(hasher);
381 hasher.update(self.name.as_bytes());
382 hasher.update(&[0]);
383 }
384}
385
386#[derive(Clone, Debug, PartialEq, Eq)]
389pub struct Tree {
390 entries: Vec<TreeEntry>,
391}
392
393impl Tree {
394 pub fn new() -> Self {
395 Self {
396 entries: Vec::new(),
397 }
398 }
399
400 pub fn from_entries(mut entries: Vec<TreeEntry>) -> Self {
401 entries.sort_by(|a, b| a.name.cmp(&b.name));
402 Self { entries }
403 }
404
405 #[cfg(test)]
406 pub(crate) fn from_entries_unchecked_for_tests(entries: Vec<TreeEntry>) -> Self {
407 Self { entries }
408 }
409
410 pub fn validate(&self) -> Result<(), TreeError> {
411 let mut previous_name: Option<&str> = None;
412 for entry in &self.entries {
413 entry.validate()?;
414 if let Some(previous) = previous_name
415 && previous >= entry.name.as_str()
416 {
417 return Err(TreeError::InvalidStructure(
418 "entries must be strictly sorted by name".to_string(),
419 ));
420 }
421 previous_name = Some(&entry.name);
422 }
423 Ok(())
424 }
425
426 pub fn entries(&self) -> &[TreeEntry] {
427 &self.entries
428 }
429
430 pub fn get(&self, name: &str) -> Option<&TreeEntry> {
431 let index = self
432 .entries
433 .binary_search_by(|entry| entry.name.as_str().cmp(name))
434 .ok()?;
435 self.entries.get(index)
436 }
437
438 pub fn insert(&mut self, entry: TreeEntry) {
439 self.entries.retain(|e| e.name != entry.name);
440 let pos = self
441 .entries
442 .iter()
443 .position(|e| e.name > entry.name)
444 .unwrap_or(self.entries.len());
445 self.entries.insert(pos, entry);
446 }
447
448 pub fn remove(&mut self, name: &str) -> Option<TreeEntry> {
449 let pos = self.entries.iter().position(|e| e.name == name)?;
450 Some(self.entries.remove(pos))
451 }
452
453 pub fn is_empty(&self) -> bool {
454 self.entries.is_empty()
455 }
456
457 pub fn len(&self) -> usize {
458 self.entries.len()
459 }
460
461 pub fn hash(&self) -> ContentHash {
462 let total_len: usize = self.entries.iter().map(TreeEntry::encoded_len).sum();
463 ContentHash::compute_typed_with_len("tree", total_len as u64, |hasher| {
464 for entry in &self.entries {
465 entry.update_hasher(hasher);
466 }
467 })
468 }
469
470 pub fn iter(&self) -> impl Iterator<Item = &TreeEntry> {
471 self.entries.iter()
472 }
473
474 pub fn get_path(&self, path: &Path) -> Option<&TreeEntry> {
475 let name = path.file_name()?.to_str()?;
476 if path.parent().is_none_or(|p| p.as_os_str().is_empty()) {
477 self.get(name)
478 } else {
479 None
480 }
481 }
482}
483
484#[derive(Serialize, Deserialize)]
487struct EncodedTreeV2 {
488 version: u8,
489 entries: Vec<EncodedTreeEntryV2>,
490}
491
492#[derive(Serialize, Deserialize)]
493struct EncodedTreeEntryV2 {
494 name: String,
495 kind: u8,
496 hash: Option<ContentHash>,
497 executable: Option<bool>,
498 git_format: Option<u8>,
499 git_oid: Option<Vec<u8>>,
500}
501
502impl Serialize for Tree {
503 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
504 where
505 S: Serializer,
506 {
507 EncodedTreeV2::from(self).serialize(serializer)
508 }
509}
510
511impl<'de> Deserialize<'de> for Tree {
512 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
513 where
514 D: Deserializer<'de>,
515 {
516 let encoded = EncodedTreeV2::deserialize(deserializer)?;
517 Tree::try_from(encoded).map_err(de::Error::custom)
518 }
519}
520
521#[derive(Debug)]
522pub(crate) enum TreeDecodeError {
523 Decode(rmp_serde::decode::Error),
524 Invalid(TreeError),
525}
526
527impl From<rmp_serde::decode::Error> for TreeDecodeError {
528 fn from(error: rmp_serde::decode::Error) -> Self {
529 Self::Decode(error)
530 }
531}
532
533impl From<TreeError> for TreeDecodeError {
534 fn from(error: TreeError) -> Self {
535 Self::Invalid(error)
536 }
537}
538
539impl From<&Tree> for EncodedTreeV2 {
540 fn from(tree: &Tree) -> Self {
541 Self {
542 version: TREE_FORMAT_VERSION,
543 entries: tree.entries.iter().map(EncodedTreeEntryV2::from).collect(),
544 }
545 }
546}
547
548impl From<&TreeEntry> for EncodedTreeEntryV2 {
549 fn from(entry: &TreeEntry) -> Self {
550 match entry.target() {
551 TreeEntryTarget::Blob { hash, executable } => Self {
552 name: entry.name.clone(),
553 kind: ENTRY_KIND_BLOB,
554 hash: Some(*hash),
555 executable: Some(*executable),
556 git_format: None,
557 git_oid: None,
558 },
559 TreeEntryTarget::Tree { hash } => Self {
560 name: entry.name.clone(),
561 kind: ENTRY_KIND_TREE,
562 hash: Some(*hash),
563 executable: None,
564 git_format: None,
565 git_oid: None,
566 },
567 TreeEntryTarget::Symlink { hash } => Self {
568 name: entry.name.clone(),
569 kind: ENTRY_KIND_SYMLINK,
570 hash: Some(*hash),
571 executable: None,
572 git_format: None,
573 git_oid: None,
574 },
575 TreeEntryTarget::Gitlink { target } => Self {
576 name: entry.name.clone(),
577 kind: ENTRY_KIND_GITLINK,
578 hash: None,
579 executable: None,
580 git_format: Some(git_format_to_tag(target.format())),
581 git_oid: Some(target.as_bytes().to_vec()),
582 },
583 }
584 }
585}
586
587impl TryFrom<EncodedTreeV2> for Tree {
588 type Error = TreeError;
589
590 fn try_from(encoded: EncodedTreeV2) -> Result<Self, Self::Error> {
591 if encoded.version != TREE_FORMAT_VERSION {
592 return Err(TreeError::InvalidStructure(format!(
593 "unsupported tree format version {}; this binary writes {}",
594 encoded.version, TREE_FORMAT_VERSION
595 )));
596 }
597 let mut entries = Vec::with_capacity(encoded.entries.len());
598 for entry in encoded.entries {
599 entries.push(TreeEntry::try_from(entry)?);
600 }
601 let tree = Tree::from_entries(entries);
602 tree.validate()?;
603 Ok(tree)
604 }
605}
606
607impl Tree {
608 pub(crate) fn decode_current_msgpack(data: &[u8]) -> Result<Self, TreeDecodeError> {
609 let encoded: EncodedTreeV2 = rmp_serde::from_slice(data)?;
610 Ok(Tree::try_from(encoded)?)
611 }
612}
613
614impl TryFrom<EncodedTreeEntryV2> for TreeEntry {
615 type Error = TreeError;
616
617 fn try_from(encoded: EncodedTreeEntryV2) -> Result<Self, Self::Error> {
618 match encoded.kind {
619 ENTRY_KIND_BLOB => TreeEntry::file(
620 encoded.name,
621 required_hash(encoded.hash, ENTRY_KIND_BLOB)?,
622 encoded.executable.unwrap_or(false),
623 ),
624 ENTRY_KIND_TREE => {
625 TreeEntry::directory(encoded.name, required_hash(encoded.hash, ENTRY_KIND_TREE)?)
626 }
627 ENTRY_KIND_SYMLINK => TreeEntry::symlink(
628 encoded.name,
629 required_hash(encoded.hash, ENTRY_KIND_SYMLINK)?,
630 ),
631 ENTRY_KIND_GITLINK => {
632 let format = git_format_from_tag(required_git_format(
633 encoded.git_format,
634 ENTRY_KIND_GITLINK,
635 )?)?;
636 let oid = encoded.git_oid.ok_or_else(|| {
637 TreeError::InvalidStructure("gitlink entry is missing git_oid".into())
638 })?;
639 let target = GitObjectId::from_raw(format, &oid).map_err(|err| {
640 TreeError::InvalidStructure(format!("invalid gitlink target: {err}"))
641 })?;
642 TreeEntry::gitlink(encoded.name, target)
643 }
644 other => Err(TreeError::InvalidStructure(format!(
645 "unknown tree entry kind {other}"
646 ))),
647 }
648 }
649}
650
651fn required_hash(hash: Option<ContentHash>, kind: u8) -> Result<ContentHash, TreeError> {
652 hash.ok_or_else(|| TreeError::InvalidStructure(format!("entry kind {kind} is missing hash")))
653}
654
655fn required_git_format(format: Option<u8>, kind: u8) -> Result<u8, TreeError> {
656 format.ok_or_else(|| {
657 TreeError::InvalidStructure(format!("entry kind {kind} is missing git_format"))
658 })
659}
660
661fn git_format_to_tag(format: GitObjectFormat) -> u8 {
662 match format {
663 GitObjectFormat::Sha1 => GIT_OBJECT_FORMAT_SHA1,
664 GitObjectFormat::Sha256 => GIT_OBJECT_FORMAT_SHA256,
665 }
666}
667
668fn git_format_from_tag(tag: u8) -> Result<GitObjectFormat, TreeError> {
669 match tag {
670 GIT_OBJECT_FORMAT_SHA1 => Ok(GitObjectFormat::Sha1),
671 GIT_OBJECT_FORMAT_SHA256 => Ok(GitObjectFormat::Sha256),
672 other => Err(TreeError::InvalidStructure(format!(
673 "unknown git object format tag {other}"
674 ))),
675 }
676}
677
678impl Default for Tree {
679 fn default() -> Self {
680 Self::new()
681 }
682}
683
684impl IntoIterator for Tree {
685 type Item = TreeEntry;
686 type IntoIter = std::vec::IntoIter<TreeEntry>;
687
688 fn into_iter(self) -> Self::IntoIter {
689 self.entries.into_iter()
690 }
691}
692
693impl<'a> IntoIterator for &'a Tree {
694 type Item = &'a TreeEntry;
695 type IntoIter = std::slice::Iter<'a, TreeEntry>;
696
697 fn into_iter(self) -> Self::IntoIter {
698 self.entries.iter()
699 }
700}