use std::path::{Component, Path, PathBuf};
use bee::file::CollectionEntry;
pub const MAX_COLLECTION_BYTES: u64 = 256 * 1024 * 1024;
pub const MAX_COLLECTION_ENTRIES: usize = 10_000;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WalkedCollection {
pub entries: Vec<CollectionEntry>,
pub total_bytes: u64,
pub default_index: Option<String>,
}
#[derive(Debug)]
pub enum WalkError {
NotADirectory(PathBuf),
TooManyEntries { cap: usize },
TooLarge { cap: u64, observed: u64 },
PathEscape(String),
Io(std::io::Error),
}
impl std::fmt::Display for WalkError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::NotADirectory(p) => write!(f, "{} is not a directory", p.display()),
Self::TooManyEntries { cap } => {
write!(f, "collection exceeds {cap}-entry cap")
}
Self::TooLarge { cap, observed } => write!(
f,
"collection size {observed} bytes exceeds {} bytes ({} MiB) cap",
cap,
cap / (1024 * 1024)
),
Self::PathEscape(p) => write!(f, "path {p:?} escapes the collection root"),
Self::Io(e) => write!(f, "io: {e}"),
}
}
}
impl std::error::Error for WalkError {}
impl From<std::io::Error> for WalkError {
fn from(e: std::io::Error) -> Self {
Self::Io(e)
}
}
pub fn walk_dir(root: &Path) -> Result<WalkedCollection, WalkError> {
let meta = std::fs::metadata(root)?;
if !meta.is_dir() {
return Err(WalkError::NotADirectory(root.to_path_buf()));
}
let root_canonical = root.canonicalize()?;
let mut entries: Vec<CollectionEntry> = Vec::new();
let mut total_bytes: u64 = 0;
let mut default_index: Option<String> = None;
walk_inner(
&root_canonical,
&root_canonical,
&mut entries,
&mut total_bytes,
&mut default_index,
)?;
entries.sort_by(|a, b| a.path.cmp(&b.path));
Ok(WalkedCollection {
entries,
total_bytes,
default_index,
})
}
fn walk_inner(
root: &Path,
dir: &Path,
out: &mut Vec<CollectionEntry>,
total: &mut u64,
default_index: &mut Option<String>,
) -> Result<(), WalkError> {
for ent in std::fs::read_dir(dir)? {
let ent = ent?;
let name = match ent.file_name().to_str().map(str::to_string) {
Some(n) => n,
None => continue, };
if name.starts_with('.') {
continue;
}
let ft = ent.file_type()?;
if ft.is_symlink() {
continue; }
let abs = ent.path();
if ft.is_dir() {
walk_inner(root, &abs, out, total, default_index)?;
continue;
}
if !ft.is_file() {
continue;
}
let rel = abs
.strip_prefix(root)
.map_err(|_| WalkError::PathEscape(abs.to_string_lossy().to_string()))?;
if rel.components().any(|c| matches!(c, Component::ParentDir)) {
return Err(WalkError::PathEscape(rel.to_string_lossy().to_string()));
}
let tar_path: String = rel
.components()
.filter_map(|c| match c {
Component::Normal(s) => s.to_str(),
_ => None,
})
.collect::<Vec<_>>()
.join("/");
let data = std::fs::read(&abs)?;
let new_total = total.saturating_add(data.len() as u64);
if new_total > MAX_COLLECTION_BYTES {
return Err(WalkError::TooLarge {
cap: MAX_COLLECTION_BYTES,
observed: new_total,
});
}
*total = new_total;
if out.len() + 1 > MAX_COLLECTION_ENTRIES {
return Err(WalkError::TooManyEntries {
cap: MAX_COLLECTION_ENTRIES,
});
}
if default_index.is_none() && tar_path == "index.html" {
*default_index = Some(tar_path.clone());
}
out.push(CollectionEntry::new(tar_path, data));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
fn tmpdir(prefix: &str) -> tempfile::TempDir {
tempfile::Builder::new()
.prefix(prefix)
.tempdir()
.expect("tempdir")
}
#[test]
fn walk_empty_dir_returns_empty_collection() {
let d = tmpdir("bee-tui-walk-empty");
let r = walk_dir(d.path()).expect("walk ok");
assert!(r.entries.is_empty());
assert_eq!(r.total_bytes, 0);
assert!(r.default_index.is_none());
}
#[test]
fn walk_picks_up_files_and_normalises_paths() {
let d = tmpdir("bee-tui-walk-paths");
fs::create_dir_all(d.path().join("assets")).unwrap();
fs::write(d.path().join("index.html"), b"<h1>hi</h1>").unwrap();
fs::write(d.path().join("assets").join("logo.png"), [0u8; 16]).unwrap();
let r = walk_dir(d.path()).expect("walk ok");
let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
assert_eq!(paths, vec!["assets/logo.png", "index.html"]);
assert_eq!(r.total_bytes, 11 + 16);
assert_eq!(r.default_index.as_deref(), Some("index.html"));
}
#[test]
fn walk_skips_hidden_files_and_dirs() {
let d = tmpdir("bee-tui-walk-hidden");
fs::create_dir_all(d.path().join(".git")).unwrap();
fs::write(d.path().join(".git").join("HEAD"), b"x").unwrap();
fs::write(d.path().join(".env"), b"x").unwrap();
fs::write(d.path().join("visible.txt"), b"y").unwrap();
let r = walk_dir(d.path()).expect("walk ok");
let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
assert_eq!(paths, vec!["visible.txt"]);
}
#[test]
fn walk_does_not_follow_symlinks() {
let d = tmpdir("bee-tui-walk-symlinks");
let outside = tmpdir("bee-tui-walk-outside");
fs::write(outside.path().join("secret.txt"), b"private").unwrap();
fs::write(d.path().join("real.txt"), b"ok").unwrap();
#[cfg(unix)]
std::os::unix::fs::symlink(outside.path(), d.path().join("link")).unwrap();
let r = walk_dir(d.path()).expect("walk ok");
let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
assert_eq!(paths, vec!["real.txt"]);
}
#[test]
fn walk_errors_on_non_directory() {
let d = tmpdir("bee-tui-walk-notdir");
let f = d.path().join("file.txt");
fs::write(&f, b"x").unwrap();
match walk_dir(&f) {
Err(WalkError::NotADirectory(_)) => {}
other => panic!("expected NotADirectory, got {other:?}"),
}
}
#[test]
fn walk_default_index_only_at_root() {
let d = tmpdir("bee-tui-walk-nested-index");
fs::create_dir_all(d.path().join("docs")).unwrap();
fs::write(d.path().join("docs").join("index.html"), b"x").unwrap();
let r = walk_dir(d.path()).expect("walk ok");
assert!(r.default_index.is_none());
}
#[test]
fn walk_orders_entries_deterministically() {
let d = tmpdir("bee-tui-walk-order");
fs::write(d.path().join("z.txt"), b"x").unwrap();
fs::write(d.path().join("a.txt"), b"x").unwrap();
fs::write(d.path().join("m.txt"), b"x").unwrap();
let r = walk_dir(d.path()).expect("walk ok");
let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
assert_eq!(paths, vec!["a.txt", "m.txt", "z.txt"]);
}
}