Skip to main content

zagens_runtime_adapters/snapshot/
size.rs

1//! Workspace byte-size estimation before side-git snapshot init.
2//!
3//! A full `git add -A` on multi-GB trees (node_modules, model weights, etc.)
4//! can block for minutes. We walk the workspace with the same ripgrep-style
5//! skip rules as search tools and bail out before touching git when the
6//! on-disk footprint exceeds `[snapshots] max_workspace_gb`.
7
8use std::io;
9use std::path::Path;
10
11use ignore::WalkBuilder;
12
13use crate::tools::workspace_walk::configure_workspace_walk;
14
15/// Default cap when `[snapshots] max_workspace_gb` is unset (matches upstream v0.8.32).
16pub const DEFAULT_SNAPSHOT_MAX_WORKSPACE_GB: f64 = 2.0;
17
18/// Returns `true` when the workspace tree exceeds `max_gb` ( gibibyte-style GB:
19/// `max_gb * 1024^3` bytes).
20pub fn workspace_exceeds_size_limit(workspace: &Path, max_gb: f64) -> io::Result<bool> {
21    if max_gb <= 0.0 {
22        return Ok(false);
23    }
24    let max_bytes = (max_gb * 1024.0 * 1024.0 * 1024.0) as u64;
25    let total = estimate_workspace_bytes(workspace, Some(max_bytes.saturating_add(1)))?;
26    Ok(total > max_bytes)
27}
28
29/// Sum file sizes under `workspace`, honoring gitignore + skip-dir rules.
30/// Stops early once `limit_bytes` would be exceeded (pass `None` for full scan).
31pub fn estimate_workspace_bytes(workspace: &Path, limit_bytes: Option<u64>) -> io::Result<u64> {
32    if workspace.is_file() {
33        let len = workspace.metadata()?.len();
34        return Ok(len);
35    }
36    if !workspace.is_dir() {
37        return Ok(0);
38    }
39
40    let mut builder = WalkBuilder::new(workspace);
41    configure_workspace_walk(&mut builder, true);
42    let walker = builder.build();
43
44    let mut total = 0u64;
45    for entry in walker.flatten() {
46        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
47            continue;
48        }
49        let len = entry.metadata().map(|m| m.len()).unwrap_or(0);
50        total = total.saturating_add(len);
51        if limit_bytes.is_some_and(|limit| total > limit) {
52            break;
53        }
54    }
55    Ok(total)
56}
57
58#[cfg(test)]
59mod tests {
60    use super::*;
61    use std::fs;
62    use tempfile::tempdir;
63
64    #[test]
65    fn estimate_respects_limit_and_skips_heavy_dir_names() {
66        let tmp = tempdir().expect("tempdir");
67        let root = tmp.path();
68        fs::write(root.join("small.txt"), "hello").expect("write");
69        let nm = root.join("node_modules");
70        fs::create_dir_all(&nm).expect("mkdir");
71        fs::write(nm.join("huge.bin"), vec![0u8; 4096]).expect("write");
72
73        let total = estimate_workspace_bytes(root, None).expect("estimate");
74        assert!(
75            total < 4096,
76            "node_modules should be skipped, got {total} bytes"
77        );
78    }
79
80    #[test]
81    fn exceeds_limit_when_small_files_add_up() {
82        let tmp = tempdir().expect("tempdir");
83        let root = tmp.path();
84        fs::write(root.join("a.bin"), vec![0u8; 512]).expect("write");
85        fs::write(root.join("b.bin"), vec![0u8; 512]).expect("write");
86        assert!(workspace_exceeds_size_limit(root, 0.0000005).expect("check"));
87        assert!(!workspace_exceeds_size_limit(root, 10.0).expect("check"));
88    }
89}