1use std::path::{Component, Path, PathBuf};
26
27use bee::file::CollectionEntry;
28
29pub const MAX_COLLECTION_BYTES: u64 = 256 * 1024 * 1024;
34
35pub const MAX_COLLECTION_ENTRIES: usize = 10_000;
40
41#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct WalkedCollection {
44 pub entries: Vec<CollectionEntry>,
45 pub total_bytes: u64,
47 pub default_index: Option<String>,
53}
54
55#[derive(Debug)]
56pub enum WalkError {
57 NotADirectory(PathBuf),
58 TooManyEntries { cap: usize },
59 TooLarge { cap: u64, observed: u64 },
60 PathEscape(String),
61 Io(std::io::Error),
62}
63
64impl std::fmt::Display for WalkError {
65 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66 match self {
67 Self::NotADirectory(p) => write!(f, "{} is not a directory", p.display()),
68 Self::TooManyEntries { cap } => {
69 write!(f, "collection exceeds {cap}-entry cap")
70 }
71 Self::TooLarge { cap, observed } => write!(
72 f,
73 "collection size {observed} bytes exceeds {} bytes ({} MiB) cap",
74 cap,
75 cap / (1024 * 1024)
76 ),
77 Self::PathEscape(p) => write!(f, "path {p:?} escapes the collection root"),
78 Self::Io(e) => write!(f, "io: {e}"),
79 }
80 }
81}
82
83impl std::error::Error for WalkError {}
84
85impl From<std::io::Error> for WalkError {
86 fn from(e: std::io::Error) -> Self {
87 Self::Io(e)
88 }
89}
90
91pub fn walk_dir(root: &Path) -> Result<WalkedCollection, WalkError> {
98 let meta = std::fs::metadata(root)?;
99 if !meta.is_dir() {
100 return Err(WalkError::NotADirectory(root.to_path_buf()));
101 }
102 let root_canonical = root.canonicalize()?;
103
104 let mut entries: Vec<CollectionEntry> = Vec::new();
105 let mut total_bytes: u64 = 0;
106 let mut default_index: Option<String> = None;
107
108 walk_inner(
109 &root_canonical,
110 &root_canonical,
111 &mut entries,
112 &mut total_bytes,
113 &mut default_index,
114 )?;
115
116 entries.sort_by(|a, b| a.path.cmp(&b.path));
119
120 Ok(WalkedCollection {
121 entries,
122 total_bytes,
123 default_index,
124 })
125}
126
127fn walk_inner(
128 root: &Path,
129 dir: &Path,
130 out: &mut Vec<CollectionEntry>,
131 total: &mut u64,
132 default_index: &mut Option<String>,
133) -> Result<(), WalkError> {
134 for ent in std::fs::read_dir(dir)? {
135 let ent = ent?;
136 let name = match ent.file_name().to_str().map(str::to_string) {
137 Some(n) => n,
138 None => continue, };
140 if name.starts_with('.') {
141 continue;
142 }
143 let ft = ent.file_type()?;
144 if ft.is_symlink() {
145 continue; }
147 let abs = ent.path();
148 if ft.is_dir() {
149 walk_inner(root, &abs, out, total, default_index)?;
150 continue;
151 }
152 if !ft.is_file() {
153 continue;
154 }
155 let rel = abs
156 .strip_prefix(root)
157 .map_err(|_| WalkError::PathEscape(abs.to_string_lossy().to_string()))?;
158 if rel.components().any(|c| matches!(c, Component::ParentDir)) {
160 return Err(WalkError::PathEscape(rel.to_string_lossy().to_string()));
161 }
162 let tar_path: String = rel
164 .components()
165 .filter_map(|c| match c {
166 Component::Normal(s) => s.to_str(),
167 _ => None,
168 })
169 .collect::<Vec<_>>()
170 .join("/");
171
172 let data = std::fs::read(&abs)?;
173 let new_total = total.saturating_add(data.len() as u64);
174 if new_total > MAX_COLLECTION_BYTES {
175 return Err(WalkError::TooLarge {
176 cap: MAX_COLLECTION_BYTES,
177 observed: new_total,
178 });
179 }
180 *total = new_total;
181
182 if out.len() + 1 > MAX_COLLECTION_ENTRIES {
183 return Err(WalkError::TooManyEntries {
184 cap: MAX_COLLECTION_ENTRIES,
185 });
186 }
187
188 if default_index.is_none() && tar_path == "index.html" {
189 *default_index = Some(tar_path.clone());
190 }
191
192 out.push(CollectionEntry::new(tar_path, data));
193 }
194 Ok(())
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200 use std::fs;
201
202 fn tmpdir(prefix: &str) -> tempfile::TempDir {
203 tempfile::Builder::new()
204 .prefix(prefix)
205 .tempdir()
206 .expect("tempdir")
207 }
208
209 #[test]
210 fn walk_empty_dir_returns_empty_collection() {
211 let d = tmpdir("bee-tui-walk-empty");
212 let r = walk_dir(d.path()).expect("walk ok");
213 assert!(r.entries.is_empty());
214 assert_eq!(r.total_bytes, 0);
215 assert!(r.default_index.is_none());
216 }
217
218 #[test]
219 fn walk_picks_up_files_and_normalises_paths() {
220 let d = tmpdir("bee-tui-walk-paths");
221 fs::create_dir_all(d.path().join("assets")).unwrap();
222 fs::write(d.path().join("index.html"), b"<h1>hi</h1>").unwrap();
223 fs::write(d.path().join("assets").join("logo.png"), [0u8; 16]).unwrap();
224 let r = walk_dir(d.path()).expect("walk ok");
225 let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
226 assert_eq!(paths, vec!["assets/logo.png", "index.html"]);
227 assert_eq!(r.total_bytes, 11 + 16);
228 assert_eq!(r.default_index.as_deref(), Some("index.html"));
229 }
230
231 #[test]
232 fn walk_skips_hidden_files_and_dirs() {
233 let d = tmpdir("bee-tui-walk-hidden");
234 fs::create_dir_all(d.path().join(".git")).unwrap();
235 fs::write(d.path().join(".git").join("HEAD"), b"x").unwrap();
236 fs::write(d.path().join(".env"), b"x").unwrap();
237 fs::write(d.path().join("visible.txt"), b"y").unwrap();
238 let r = walk_dir(d.path()).expect("walk ok");
239 let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
240 assert_eq!(paths, vec!["visible.txt"]);
241 }
242
243 #[test]
244 fn walk_does_not_follow_symlinks() {
245 let d = tmpdir("bee-tui-walk-symlinks");
246 let outside = tmpdir("bee-tui-walk-outside");
247 fs::write(outside.path().join("secret.txt"), b"private").unwrap();
248 fs::write(d.path().join("real.txt"), b"ok").unwrap();
249 #[cfg(unix)]
250 std::os::unix::fs::symlink(outside.path(), d.path().join("link")).unwrap();
251 let r = walk_dir(d.path()).expect("walk ok");
252 let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
253 assert_eq!(paths, vec!["real.txt"]);
254 }
255
256 #[test]
257 fn walk_errors_on_non_directory() {
258 let d = tmpdir("bee-tui-walk-notdir");
259 let f = d.path().join("file.txt");
260 fs::write(&f, b"x").unwrap();
261 match walk_dir(&f) {
262 Err(WalkError::NotADirectory(_)) => {}
263 other => panic!("expected NotADirectory, got {other:?}"),
264 }
265 }
266
267 #[test]
268 fn walk_default_index_only_at_root() {
269 let d = tmpdir("bee-tui-walk-nested-index");
272 fs::create_dir_all(d.path().join("docs")).unwrap();
273 fs::write(d.path().join("docs").join("index.html"), b"x").unwrap();
274 let r = walk_dir(d.path()).expect("walk ok");
275 assert!(r.default_index.is_none());
276 }
277
278 #[test]
279 fn walk_orders_entries_deterministically() {
280 let d = tmpdir("bee-tui-walk-order");
281 fs::write(d.path().join("z.txt"), b"x").unwrap();
282 fs::write(d.path().join("a.txt"), b"x").unwrap();
283 fs::write(d.path().join("m.txt"), b"x").unwrap();
284 let r = walk_dir(d.path()).expect("walk ok");
285 let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
286 assert_eq!(paths, vec!["a.txt", "m.txt", "z.txt"]);
287 }
288}