Skip to main content

bee_tui/
uploads.rs

1//! Directory walker for `:upload-collection`. Recursively reads a
2//! local directory and produces the `Vec<CollectionEntry>` that
3//! `bee.file().upload_collection_entries(...)` consumes.
4//!
5//! ## Why a separate module
6//!
7//! `:upload-file` is one `tokio::fs::read` away from a working
8//! upload — it doesn't need its own module. Collections are
9//! different: traversal rules (skip hidden, skip symlinks),
10//! caps (size + entry count), and tar-friendly path normalisation
11//! all want unit tests against synthetic on-disk fixtures.
12//! Keeping the walker pure (no Bee API, no tokio runtime) makes it
13//! straightforward to test that.
14//!
15//! ## Path normalisation
16//!
17//! Tar entries within a collection use forward-slash separators
18//! regardless of host OS — Bee resolves them to manifest forks
19//! literally. We strip the input-dir prefix from each walked path,
20//! convert backslashes to forward slashes (Windows safety), and
21//! reject any path that escapes the root via `..` (defense in
22//! depth — `WalkDir` shouldn't surface them when symlinks are
23//! ignored, but we keep the explicit check).
24
25use std::path::{Component, Path, PathBuf};
26
27use bee::file::CollectionEntry;
28
29/// Total bytes an `:upload-collection` invocation is allowed to
30/// pack. Same ceiling as `:upload-file`: keeps the cockpit's event
31/// loop responsive on operator-typical hardware. Operators with
32/// larger payloads should drive `swarm-cli` out of process.
33pub const MAX_COLLECTION_BYTES: u64 = 256 * 1024 * 1024;
34
35/// Cap on entry count. A 10k-file collection is already extreme
36/// for a TUI verb (the tar build is in-memory) but the cap is
37/// generous enough that a typical static-site `dist/` directory
38/// fits without question.
39pub const MAX_COLLECTION_ENTRIES: usize = 10_000;
40
41/// Result of a directory walk.
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct WalkedCollection {
44    pub entries: Vec<CollectionEntry>,
45    /// Sum of `entries[*].data.len()`.
46    pub total_bytes: u64,
47    /// Path of an `index.html` at the *collection root* (depth 1)
48    /// when present — surfaced so the verb can auto-set
49    /// `CollectionUploadOptions::index_document`. `None` when no
50    /// such file exists; the operator can still pass an explicit
51    /// `--index <path>` flag in a future iteration.
52    pub default_index: Option<String>,
53}
54
55#[derive(Debug)]
56pub enum WalkError {
57    NotADirectory(PathBuf),
58    TooManyEntries { cap: usize },
59    TooLarge { cap: u64, observed: u64 },
60    PathEscape(String),
61    Io(std::io::Error),
62}
63
64impl std::fmt::Display for WalkError {
65    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66        match self {
67            Self::NotADirectory(p) => write!(f, "{} is not a directory", p.display()),
68            Self::TooManyEntries { cap } => {
69                write!(f, "collection exceeds {cap}-entry cap")
70            }
71            Self::TooLarge { cap, observed } => write!(
72                f,
73                "collection size {observed} bytes exceeds {} bytes ({} MiB) cap",
74                cap,
75                cap / (1024 * 1024)
76            ),
77            Self::PathEscape(p) => write!(f, "path {p:?} escapes the collection root"),
78            Self::Io(e) => write!(f, "io: {e}"),
79        }
80    }
81}
82
83impl std::error::Error for WalkError {}
84
85impl From<std::io::Error> for WalkError {
86    fn from(e: std::io::Error) -> Self {
87        Self::Io(e)
88    }
89}
90
91/// Walk `root` recursively, returning every regular file as a
92/// [`CollectionEntry`]. Hidden files / directories (any path
93/// component starting with `.`) and symlinks are skipped.
94///
95/// Pure (no Bee API). Errors are returned as [`WalkError`] so the
96/// caller can format an operator-facing message.
97pub fn walk_dir(root: &Path) -> Result<WalkedCollection, WalkError> {
98    let meta = std::fs::metadata(root)?;
99    if !meta.is_dir() {
100        return Err(WalkError::NotADirectory(root.to_path_buf()));
101    }
102    let root_canonical = root.canonicalize()?;
103
104    let mut entries: Vec<CollectionEntry> = Vec::new();
105    let mut total_bytes: u64 = 0;
106    let mut default_index: Option<String> = None;
107
108    walk_inner(
109        &root_canonical,
110        &root_canonical,
111        &mut entries,
112        &mut total_bytes,
113        &mut default_index,
114    )?;
115
116    // Stable, deterministic order so tar output and the resulting
117    // Swarm reference are reproducible across runs.
118    entries.sort_by(|a, b| a.path.cmp(&b.path));
119
120    Ok(WalkedCollection {
121        entries,
122        total_bytes,
123        default_index,
124    })
125}
126
127fn walk_inner(
128    root: &Path,
129    dir: &Path,
130    out: &mut Vec<CollectionEntry>,
131    total: &mut u64,
132    default_index: &mut Option<String>,
133) -> Result<(), WalkError> {
134    for ent in std::fs::read_dir(dir)? {
135        let ent = ent?;
136        let name = match ent.file_name().to_str().map(str::to_string) {
137            Some(n) => n,
138            None => continue, // non-UTF-8 names — skip silently
139        };
140        if name.starts_with('.') {
141            continue;
142        }
143        let ft = ent.file_type()?;
144        if ft.is_symlink() {
145            continue; // safety — don't follow symlinks out of root
146        }
147        let abs = ent.path();
148        if ft.is_dir() {
149            walk_inner(root, &abs, out, total, default_index)?;
150            continue;
151        }
152        if !ft.is_file() {
153            continue;
154        }
155        let rel = abs
156            .strip_prefix(root)
157            .map_err(|_| WalkError::PathEscape(abs.to_string_lossy().to_string()))?;
158        // Reject any `..` traversal in the relative path.
159        if rel.components().any(|c| matches!(c, Component::ParentDir)) {
160            return Err(WalkError::PathEscape(rel.to_string_lossy().to_string()));
161        }
162        // Tar paths use forward slashes everywhere.
163        let tar_path: String = rel
164            .components()
165            .filter_map(|c| match c {
166                Component::Normal(s) => s.to_str(),
167                _ => None,
168            })
169            .collect::<Vec<_>>()
170            .join("/");
171
172        let data = std::fs::read(&abs)?;
173        let new_total = total.saturating_add(data.len() as u64);
174        if new_total > MAX_COLLECTION_BYTES {
175            return Err(WalkError::TooLarge {
176                cap: MAX_COLLECTION_BYTES,
177                observed: new_total,
178            });
179        }
180        *total = new_total;
181
182        if out.len() + 1 > MAX_COLLECTION_ENTRIES {
183            return Err(WalkError::TooManyEntries {
184                cap: MAX_COLLECTION_ENTRIES,
185            });
186        }
187
188        if default_index.is_none() && tar_path == "index.html" {
189            *default_index = Some(tar_path.clone());
190        }
191
192        out.push(CollectionEntry::new(tar_path, data));
193    }
194    Ok(())
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use std::fs;
201
202    fn tmpdir(prefix: &str) -> tempfile::TempDir {
203        tempfile::Builder::new()
204            .prefix(prefix)
205            .tempdir()
206            .expect("tempdir")
207    }
208
209    #[test]
210    fn walk_empty_dir_returns_empty_collection() {
211        let d = tmpdir("bee-tui-walk-empty");
212        let r = walk_dir(d.path()).expect("walk ok");
213        assert!(r.entries.is_empty());
214        assert_eq!(r.total_bytes, 0);
215        assert!(r.default_index.is_none());
216    }
217
218    #[test]
219    fn walk_picks_up_files_and_normalises_paths() {
220        let d = tmpdir("bee-tui-walk-paths");
221        fs::create_dir_all(d.path().join("assets")).unwrap();
222        fs::write(d.path().join("index.html"), b"<h1>hi</h1>").unwrap();
223        fs::write(d.path().join("assets").join("logo.png"), [0u8; 16]).unwrap();
224        let r = walk_dir(d.path()).expect("walk ok");
225        let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
226        assert_eq!(paths, vec!["assets/logo.png", "index.html"]);
227        assert_eq!(r.total_bytes, 11 + 16);
228        assert_eq!(r.default_index.as_deref(), Some("index.html"));
229    }
230
231    #[test]
232    fn walk_skips_hidden_files_and_dirs() {
233        let d = tmpdir("bee-tui-walk-hidden");
234        fs::create_dir_all(d.path().join(".git")).unwrap();
235        fs::write(d.path().join(".git").join("HEAD"), b"x").unwrap();
236        fs::write(d.path().join(".env"), b"x").unwrap();
237        fs::write(d.path().join("visible.txt"), b"y").unwrap();
238        let r = walk_dir(d.path()).expect("walk ok");
239        let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
240        assert_eq!(paths, vec!["visible.txt"]);
241    }
242
243    #[test]
244    fn walk_does_not_follow_symlinks() {
245        let d = tmpdir("bee-tui-walk-symlinks");
246        let outside = tmpdir("bee-tui-walk-outside");
247        fs::write(outside.path().join("secret.txt"), b"private").unwrap();
248        fs::write(d.path().join("real.txt"), b"ok").unwrap();
249        #[cfg(unix)]
250        std::os::unix::fs::symlink(outside.path(), d.path().join("link")).unwrap();
251        let r = walk_dir(d.path()).expect("walk ok");
252        let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
253        assert_eq!(paths, vec!["real.txt"]);
254    }
255
256    #[test]
257    fn walk_errors_on_non_directory() {
258        let d = tmpdir("bee-tui-walk-notdir");
259        let f = d.path().join("file.txt");
260        fs::write(&f, b"x").unwrap();
261        match walk_dir(&f) {
262            Err(WalkError::NotADirectory(_)) => {}
263            other => panic!("expected NotADirectory, got {other:?}"),
264        }
265    }
266
267    #[test]
268    fn walk_default_index_only_at_root() {
269        // index.html nested inside a subdirectory should NOT be
270        // treated as the collection's default index.
271        let d = tmpdir("bee-tui-walk-nested-index");
272        fs::create_dir_all(d.path().join("docs")).unwrap();
273        fs::write(d.path().join("docs").join("index.html"), b"x").unwrap();
274        let r = walk_dir(d.path()).expect("walk ok");
275        assert!(r.default_index.is_none());
276    }
277
278    #[test]
279    fn walk_orders_entries_deterministically() {
280        let d = tmpdir("bee-tui-walk-order");
281        fs::write(d.path().join("z.txt"), b"x").unwrap();
282        fs::write(d.path().join("a.txt"), b"x").unwrap();
283        fs::write(d.path().join("m.txt"), b"x").unwrap();
284        let r = walk_dir(d.path()).expect("walk ok");
285        let paths: Vec<&str> = r.entries.iter().map(|e| e.path.as_str()).collect();
286        assert_eq!(paths, vec!["a.txt", "m.txt", "z.txt"]);
287    }
288}