Skip to main content

harmont_cli/orchestrator/
source.rs

1//! Source-archive helpers shared between remote and local run modes.
2//!
3//! Walks a directory respecting `.gitignore` and produces a `.tar.gz`.
4//! Local mode pipes the result into a chain-root container's stdin so
5//! steps see the user's tree under `/workspace`. Remote mode hashes the
6//! same bytes and ships them as a base64 blob in the build request.
7
8use std::io::Write;
9use std::path::Path;
10
11use anyhow::{Context, Result};
12use flate2::Compression;
13use flate2::write::GzEncoder;
14use ignore::WalkBuilder;
15use tar::Builder as TarBuilder;
16
17/// Build a tar.gz archive of `source_dir` (respecting .gitignore) and
18/// return the resulting bytes. Excludes the literal `.git` directory.
19///
20/// # Errors
21///
22/// Returns the same errors as [`write_archive`].
23pub fn build_archive_bytes(source_dir: &Path) -> Result<Vec<u8>> {
24    let mut buf: Vec<u8> = Vec::new();
25    write_archive(source_dir, &mut buf)?;
26    Ok(buf)
27}
28
29/// Write a tar.gz archive of `source_dir` into `w`.
30///
31/// # Errors
32///
33/// Returns an error if walking the source tree surfaces an I/O or
34/// permission error, if a file cannot be appended to the archive, or
35/// if the gzip stream cannot be finalised on the destination writer.
36pub fn write_archive(source_dir: &Path, w: impl Write) -> Result<()> {
37    let encoder = GzEncoder::new(w, Compression::fast());
38    let mut archive = TarBuilder::new(encoder);
39
40    // `WalkBuilder` defaults to `hidden(true)`, which silently drops
41    // every dotfile — `.eslintrc.json`, `.ocamlformat`, `.gitignore`
42    // overrides per-example, etc. We need those in the archive shipped
43    // to the container, so flip `hidden(false)`. The literal `.git`
44    // and `.harmont` directories are still excluded via `filter_entry`
45    // — `.git` is repository bookkeeping; `.harmont` holds the
46    // pipeline-render entry point (already executed host-side) and
47    // its `__pycache__`, both of which would otherwise leak into the
48    // workspace and trip up project-level tools (e.g. ruff format
49    // walking every `.py` file under /workspace).
50    let walker = WalkBuilder::new(source_dir)
51        .hidden(false)
52        .git_ignore(true)
53        .git_global(true)
54        .git_exclude(true)
55        .filter_entry(|entry: &ignore::DirEntry| {
56            let name = entry.file_name().to_string_lossy();
57            name != ".git" && name != ".harmont"
58        })
59        .build();
60
61    for entry in walker {
62        let entry: ignore::DirEntry = entry.context("walking source directory")?;
63        let entry_path = entry.path();
64        if entry_path.is_file() {
65            let relative = entry_path.strip_prefix(source_dir).unwrap_or(entry_path);
66            archive
67                .append_path_with_name(entry_path, relative)
68                .with_context(|| format!("adding {}", entry_path.display()))?;
69        }
70    }
71    archive
72        .into_inner()
73        .context("finishing gzip stream")?
74        .finish()
75        .context("finalizing gzip")?;
76    Ok(())
77}
78
79#[cfg(test)]
80#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
81mod tests {
82    use std::fs;
83
84    use super::*;
85
86    #[test]
87    fn build_archive_emits_nonempty_gzip_for_simple_tree() {
88        let tmp = tempfile::tempdir().unwrap();
89        fs::write(tmp.path().join("hello.txt"), b"hi").unwrap();
90        let bytes = build_archive_bytes(tmp.path()).unwrap();
91        // gzip magic 0x1f 0x8b
92        assert!(bytes.len() > 2);
93        assert_eq!(&bytes[..2], &[0x1f, 0x8b]);
94    }
95
96    #[test]
97    fn build_archive_skips_dot_git() {
98        use flate2::read::GzDecoder;
99        use std::io::Read;
100
101        let tmp = tempfile::tempdir().unwrap();
102        fs::create_dir(tmp.path().join(".git")).unwrap();
103        fs::write(tmp.path().join(".git/HEAD"), b"ref: refs/heads/main").unwrap();
104        fs::write(tmp.path().join("kept.txt"), b"k").unwrap();
105
106        let bytes = build_archive_bytes(tmp.path()).unwrap();
107        // Inflate and inspect entries.
108        let mut gz = GzDecoder::new(&bytes[..]);
109        let mut tar_bytes = Vec::new();
110        gz.read_to_end(&mut tar_bytes).unwrap();
111        let mut ar = tar::Archive::new(&tar_bytes[..]);
112        let names: Vec<String> = ar
113            .entries()
114            .unwrap()
115            .map(|e| e.unwrap().path().unwrap().display().to_string())
116            .collect();
117        assert!(names.iter().any(|n| n == "kept.txt"), "got: {names:?}");
118        assert!(
119            !names.iter().any(|n| n.starts_with(".git")),
120            "got: {names:?}"
121        );
122    }
123}