use crate::error::{Error, Result};
use crate::hash::Hash;
use std::io::Read;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EntryType {
Blob = 1,
Tree = 2,
}
impl EntryType {
pub fn to_u8(self) -> u8 {
self as u8
}
pub fn from_u8(value: u8) -> Result<Self> {
match value {
1 => Ok(EntryType::Blob),
2 => Ok(EntryType::Tree),
_ => Err(Error::invalid_tree_entry(format!(
"Invalid entry type: {}",
value
))),
}
}
}
pub type FileMode = u32;
pub mod file_modes {
use super::FileMode;
pub const REGULAR: FileMode = 0o100644;
pub const EXECUTABLE: FileMode = 0o100755;
pub const DIRECTORY: FileMode = 0o040755;
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TreeEntry {
pub entry_type: EntryType,
pub mode: FileMode,
pub hash: Hash,
pub name: String,
}
impl TreeEntry {
pub fn new(entry_type: EntryType, mode: FileMode, hash: Hash, name: String) -> Result<Self> {
if name.is_empty() {
return Err(Error::invalid_tree_entry("Name cannot be empty"));
}
if name.len() > 255 {
return Err(Error::invalid_tree_entry(format!(
"Name too long: {} bytes (max 255)",
name.len()
)));
}
if name.contains('\0') {
return Err(Error::invalid_tree_entry("Name cannot contain null bytes"));
}
Ok(Self {
entry_type,
mode,
hash,
name,
})
}
pub fn encode(&self) -> Vec<u8> {
let mut buf = Vec::new();
buf.push(self.entry_type.to_u8());
buf.extend_from_slice(&self.mode.to_le_bytes());
buf.extend_from_slice(self.hash.as_bytes());
buf.push(self.name.len() as u8);
buf.extend_from_slice(self.name.as_bytes());
buf
}
pub fn decode<R: Read>(reader: &mut R) -> Result<Self> {
let mut type_buf = [0u8; 1];
reader.read_exact(&mut type_buf)?;
let entry_type = EntryType::from_u8(type_buf[0])?;
let mut mode_buf = [0u8; 4];
reader.read_exact(&mut mode_buf)?;
let mode = u32::from_le_bytes(mode_buf);
let mut hash_buf = [0u8; 32];
reader.read_exact(&mut hash_buf)?;
let hash = Hash::from_bytes(hash_buf);
let mut name_len_buf = [0u8; 1];
reader.read_exact(&mut name_len_buf)?;
let name_len = name_len_buf[0] as usize;
if name_len == 0 {
return Err(Error::invalid_tree_entry("Name length is zero"));
}
let mut name_buf = vec![0u8; name_len];
reader.read_exact(&mut name_buf)?;
let name = String::from_utf8(name_buf)
.map_err(|e| Error::invalid_tree_entry(format!("Invalid UTF-8 in name: {}", e)))?;
Self::new(entry_type, mode, hash, name)
}
}
impl PartialOrd for TreeEntry {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for TreeEntry {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.name.as_bytes().cmp(other.name.as_bytes())
}
}
pub fn encode_tree(mut entries: Vec<TreeEntry>) -> Vec<u8> {
entries.sort();
let mut buf = Vec::new();
for entry in entries {
buf.extend_from_slice(&entry.encode());
}
buf
}
pub fn decode_tree(data: &[u8]) -> Result<Vec<TreeEntry>> {
let mut reader = std::io::Cursor::new(data);
let mut entries = Vec::new();
while reader.position() < data.len() as u64 {
let entry = TreeEntry::decode(&mut reader)?;
entries.push(entry);
}
Ok(entries)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entry_encode_decode() {
let hash = Hash::hash_bytes(b"test");
let entry = TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
hash,
"test.txt".to_string(),
)
.unwrap();
let encoded = entry.encode();
let mut reader = std::io::Cursor::new(&encoded);
let decoded = TreeEntry::decode(&mut reader).unwrap();
assert_eq!(decoded, entry);
}
#[test]
fn test_entry_name_validation() {
let hash = Hash::hash_bytes(b"test");
assert!(
TreeEntry::new(EntryType::Blob, file_modes::REGULAR, hash, "".to_string()).is_err()
);
let long_name = "a".repeat(256);
assert!(TreeEntry::new(EntryType::Blob, file_modes::REGULAR, hash, long_name).is_err());
assert!(
TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
hash,
"test\0file".to_string()
)
.is_err()
);
}
#[test]
fn test_tree_ordering() {
let hash = Hash::hash_bytes(b"test");
let entry1 = TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
hash,
"a.txt".to_string(),
)
.unwrap();
let entry2 = TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
hash,
"b.txt".to_string(),
)
.unwrap();
let entry3 = TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
hash,
"c.txt".to_string(),
)
.unwrap();
assert!(entry1 < entry2);
assert!(entry2 < entry3);
}
#[test]
fn test_encode_decode_tree() {
let hash1 = Hash::hash_bytes(b"test1");
let hash2 = Hash::hash_bytes(b"test2");
let entries = vec![
TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
hash1,
"b.txt".to_string(),
)
.unwrap(),
TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
hash2,
"a.txt".to_string(),
)
.unwrap(),
];
let encoded = encode_tree(entries.clone());
let decoded = decode_tree(&encoded).unwrap();
assert_eq!(decoded.len(), 2);
assert_eq!(decoded[0].name, "a.txt");
assert_eq!(decoded[1].name, "b.txt");
}
#[test]
fn test_empty_tree() {
let entries: Vec<TreeEntry> = vec![];
let encoded = encode_tree(entries);
assert_eq!(encoded.len(), 0);
let decoded = decode_tree(&encoded).unwrap();
assert_eq!(decoded.len(), 0);
}
use proptest::prelude::*;
fn arb_entry_name() -> impl Strategy<Value = String> {
"[a-zA-Z0-9._-]{1,255}"
.prop_filter("no null bytes", |s| !s.contains('\0'))
}
fn arb_tree_entry() -> impl Strategy<Value = TreeEntry> {
(
prop::sample::select(vec![EntryType::Blob, EntryType::Tree]),
any::<u32>(),
prop::array::uniform32(any::<u8>()),
arb_entry_name(),
)
.prop_map(|(entry_type, mode, hash_bytes, name)| {
TreeEntry::new(entry_type, mode, Hash::from_bytes(hash_bytes), name).unwrap()
})
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 256,
max_shrink_iters: 10000,
..ProptestConfig::default()
})]
#[test]
fn prop_tree_entry_roundtrip(entry in arb_tree_entry()) {
let encoded = entry.encode();
let mut reader = std::io::Cursor::new(&encoded);
let decoded = TreeEntry::decode(&mut reader)?;
prop_assert_eq!(entry, decoded);
}
#[test]
fn prop_tree_canonicalization_order_independent(
entries in prop::collection::vec(arb_tree_entry(), 1..20)
) {
let mut sorted1 = entries.clone();
sorted1.sort();
let encoded1 = encode_tree(sorted1);
let hash1 = Hash::hash_bytes(&encoded1);
let mut reversed = entries;
reversed.reverse();
reversed.sort();
let encoded2 = encode_tree(reversed);
let hash2 = Hash::hash_bytes(&encoded2);
prop_assert_eq!(
hash1,
hash2,
"Tree hash must be independent of input ordering"
);
}
#[test]
fn prop_empty_name_rejected(
entry_type in prop::sample::select(vec![EntryType::Blob, EntryType::Tree]),
mode in any::<u32>(),
hash_bytes in prop::array::uniform32(any::<u8>()),
) {
let result = TreeEntry::new(
entry_type,
mode,
Hash::from_bytes(hash_bytes),
String::new(),
);
prop_assert!(result.is_err());
}
#[test]
fn prop_null_byte_rejected(
prefix in "[a-zA-Z0-9]{0,10}",
suffix in "[a-zA-Z0-9]{0,10}",
) {
let name = format!("{}\0{}", prefix, suffix);
let result = TreeEntry::new(
EntryType::Blob,
0o644,
Hash::hash_bytes(b"test"),
name,
);
prop_assert!(result.is_err());
}
#[test]
fn prop_long_name_rejected(name in "[a-zA-Z]{256,300}") {
let result = TreeEntry::new(
EntryType::Blob,
0o644,
Hash::hash_bytes(b"test"),
name,
);
prop_assert!(result.is_err());
}
}
}