use sha3::{Digest, Sha3_256};
use std::collections::{HashMap, VecDeque};
use std::fmt;
use std::fs;
use std::path::PathBuf;
use walkdir::{DirEntry, WalkDir};
#[derive(Debug, Clone, Copy)]
enum NodeType {
Directory,
DirSeparator,
File,
Symlink,
}
impl NodeType {
pub fn to_u8(self) -> u8 {
match self {
NodeType::Directory => 2,
NodeType::DirSeparator => 3,
NodeType::File => 5,
NodeType::Symlink => 7,
}
}
}
#[derive(Debug, Clone)]
enum ContentResult {
File(Vec<u8>),
Directory,
}
#[derive(Debug)]
pub enum ContentError {
UnknownNodeType,
IOError(std::io::Error),
}
impl fmt::Display for ContentError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ContentError::UnknownNodeType => write!(f, "Node is not a directory, file, or symlink"),
ContentError::IOError(_) => write!(f, "Encountered a problem opening a node"),
}
}
}
impl From<std::io::Error> for ContentError {
fn from(err: std::io::Error) -> Self {
Self::IOError(err)
}
}
#[derive(Debug)]
pub enum HashError {
ContentError(ContentError),
}
impl fmt::Display for HashError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Hash could not be computed")
}
}
impl From<ContentError> for HashError {
fn from(err: ContentError) -> Self {
Self::ContentError(err)
}
}
pub fn hash_directories(paths: Vec<PathBuf>) -> Result<Vec<u8>, HashError> {
let mut paths = paths.clone();
paths.sort();
let data = paths
.iter()
.cloned()
.map(|p| hash_directory(p))
.collect::<Result<Vec<_>, HashError>>()?
.join(&NodeType::DirSeparator.to_u8());
match paths.len() {
1 => Ok(data),
_ => Ok(Vec::from(
Sha3_256::new().chain_update(data).finalize().as_slice(),
)),
}
}
pub fn hash_directory(path: PathBuf) -> Result<Vec<u8>, HashError> {
let mut cache_map: HashMap<usize, VecDeque<Vec<u8>>> = HashMap::new();
for entry in WalkDir::new(&path).sort_by_file_name().contents_first(true) {
let entry = match entry {
Ok(v) => v,
Err(_) => continue,
};
let content = hash_content(&entry)?;
let name = Vec::from(
Sha3_256::new()
.chain_update(entry.file_name().to_string_lossy().as_bytes())
.finalize()
.as_slice(),
);
let node = match content {
crate::ContentResult::File(content) => node_file_hash(&entry, name, content),
crate::ContentResult::Directory => node_dir_hash(&mut cache_map, &entry, name),
};
match cache_map.get_mut(&entry.depth()) {
Some(q) => q.push_back(node),
None => {
let mut q: VecDeque<Vec<u8>> = VecDeque::new();
q.push_back(node);
cache_map.insert(entry.depth(), q);
}
}
}
Ok(cache_map.get_mut(&0).unwrap().pop_front().unwrap())
}
fn hash_content(entry: &DirEntry) -> Result<ContentResult, ContentError> {
if entry.file_type().is_symlink() {
Ok(ContentResult::File(Vec::from(
Sha3_256::new()
.chain_update(fs::read_link(entry.path())?.to_string_lossy().as_bytes())
.finalize()
.as_slice(),
)))
} else if entry.file_type().is_dir() {
Ok(ContentResult::Directory)
} else if entry.file_type().is_file() {
Ok(ContentResult::File(Vec::from(
Sha3_256::new()
.chain_update(fs::read(entry.path())?)
.finalize()
.as_slice(),
)))
} else {
Err(ContentError::UnknownNodeType)
}
}
fn node_file_hash(entry: &DirEntry, name: Vec<u8>, content: Vec<u8>) -> Vec<u8> {
Vec::from(
Sha3_256::new()
.chain_update(name)
.chain_update(if entry.file_type().is_symlink() {
[NodeType::Symlink.to_u8()]
} else {
[NodeType::File.to_u8()]
})
.chain_update(content)
.finalize()
.as_slice(),
)
}
fn node_dir_hash(
cache: &mut HashMap<usize, VecDeque<Vec<u8>>>,
entry: &DirEntry,
name: Vec<u8>,
) -> Vec<u8> {
let hasher = Sha3_256::new()
.chain_update(name)
.chain_update([NodeType::Directory.to_u8()]);
let data = match cache.get_mut(&(entry.depth() + 1)) {
Some(q) => q
.drain(..)
.collect::<Vec<Vec<u8>>>()
.join(&NodeType::DirSeparator.to_u8()),
None => Vec::new(),
};
Vec::from(hasher.chain_update(data).finalize().as_slice())
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use crate::{Digest, Sha3_256};
use hex_literal::hex;
#[test]
fn basic_sha256() {
let mut hasher = Sha3_256::new();
hasher.update(b"coffee");
let result = hasher.finalize();
assert_eq!(
result[..],
hex!("2250fa0b557f93b1d92a26e2ca55cfe2354e416e9d674277784cfb09184b41b0")[..]
);
}
#[test]
fn hash_test_data_one() {
let result = crate::hash_directory(PathBuf::from("test_data/one")).unwrap();
assert_eq!(
result[..],
hex!("9fd3dceb108e5f6067a623a592524a4014f5d7244e537891d147b51e8c1c147d")
)
}
#[test]
fn hash_test_data_two() {
let result = crate::hash_directory(PathBuf::from("test_data/two")).unwrap();
assert_eq!(
result[..],
hex!("9119ffd015d217097164f944331ee865fb6ac8c0b670728cf42c9e45c21ea0df")
)
}
#[test]
fn hash_test_data_all() {
let result = crate::hash_directory(PathBuf::from("test_data")).unwrap();
assert_eq!(
result[..],
hex!("3b5e49ac9126759771d677bdacbc18a63ff94ad4e07718c18347254d7b9c6cb1")
)
}
#[test]
fn hash_test_data_all_via_directories() {
let input = vec![
PathBuf::from("test_data/two"),
PathBuf::from("test_data/one"),
];
let result = crate::hash_directories(input).unwrap();
assert_eq!(
result[..],
hex!("9fe82ef81c21f04ed6050b650fdef5c441fa49db43b2aaf7e383f7466778b026")
)
}
#[test]
fn hash_test_data_just_one_via_directories() {
let input = vec![PathBuf::from("test_data/one")];
let result = crate::hash_directories(input).unwrap();
assert_eq!(
result[..],
hex!("9fd3dceb108e5f6067a623a592524a4014f5d7244e537891d147b51e8c1c147d")
)
}
#[test]
fn hash_directories_empty_input() {
let result = crate::hash_directories(vec![]).unwrap();
assert_eq!(
result[..],
hex!("a7ffc6f8bf1ed76651c14756a061d662f580ff4de43b49fa82d80a4b80f8434a")
)
}
}