Skip to main content

harn_cli/commands/run/
harnpack.rs

1//! `harn run <bundle.harnpack>` — verify the embedded OpenTrustGraph
2//! signature, replay the archive into the content-addressed pack cache,
3//! and execute the bundled entrypoint.
4//!
5//! See issue #1784 (epic #1779). The verify path reuses the helpers
6//! shipped with E6.1/E6.3 (`workflow_bundle.rs`) so signing and
7//! verification share the same canonical-hash code path.
8
9use std::fmt::Write;
10use std::fs;
11use std::io;
12use std::path::{Component, Path, PathBuf};
13
14use harn_vm::bytecode_cache;
15use harn_vm::orchestration::{
16    read_harnpack, verify_workflow_bundle_signature, workflow_bundle_hash, HarnpackEntry,
17    WorkflowBundle, WorkflowBundleError,
18};
19
20/// Zstandard magic prefix. `.harnpack` archives are zstd-compressed tar
21/// streams, so the on-disk byte signature is the zstd frame header.
22const ZSTD_MAGIC: &[u8; 4] = &[0x28, 0xb5, 0x2f, 0xfd];
23
24/// Options for [`prepare_harnpack`].
25#[derive(Clone, Debug, Default, PartialEq, Eq)]
26pub struct HarnpackRunOptions {
27    /// Run the pack even when it carries no Ed25519 signature.
28    pub allow_unsigned: bool,
29    /// Verify-only mode: stop after the cache replay and emit a
30    /// `pack_run` event without executing the entrypoint.
31    pub dry_run_verify: bool,
32}
33
34/// Outcome of [`prepare_harnpack`]. The CLI surface uses this to (a)
35/// emit the `pack_run` event before the run starts, (b) decide whether
36/// to short-circuit on `--dry-run-verify`, and (c) hand off the unpacked
37/// entrypoint path to the existing source-execution code path.
38#[derive(Debug)]
39pub struct PreparedHarnpack {
40    pub bundle_hash: String,
41    pub signature_verified: bool,
42    pub key_id: Option<String>,
43    pub cache_hit: bool,
44    pub cache_dir: PathBuf,
45    pub entrypoint_path: PathBuf,
46    pub manifest: WorkflowBundle,
47}
48
49#[derive(Debug)]
50pub struct HarnpackError {
51    pub code: &'static str,
52    pub message: String,
53}
54
55impl HarnpackError {
56    fn new(code: &'static str, message: impl Into<String>) -> Self {
57        Self {
58            code,
59            message: message.into(),
60        }
61    }
62}
63
64impl std::fmt::Display for HarnpackError {
65    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66        write!(f, "{}", self.message)
67    }
68}
69
70impl std::error::Error for HarnpackError {}
71
72impl From<WorkflowBundleError> for HarnpackError {
73    fn from(error: WorkflowBundleError) -> Self {
74        Self::new("harnpack.archive", error.message)
75    }
76}
77
78/// Detect whether `path` references a `.harnpack` bundle by extension
79/// or zstd magic header. The magic-header path keeps detection robust
80/// for renamed bundles (`./bundle` without extension) which is the
81/// failure mode that bit us when users curl bundles without `-o`.
82pub fn looks_like_harnpack(path: &Path) -> bool {
83    if path.extension().and_then(|ext| ext.to_str()) == Some("harnpack") {
84        return true;
85    }
86    match fs::File::open(path) {
87        Ok(mut file) => {
88            use std::io::Read;
89            let mut buf = [0u8; 4];
90            file.read_exact(&mut buf).is_ok() && &buf == ZSTD_MAGIC
91        }
92        Err(_) => false,
93    }
94}
95
96/// Verify the bundle at `path`, replay it into the content-addressed
97/// pack cache, and return the unpacked entrypoint to execute.
98///
99/// Errors map to user-facing exit-code-1 messages on the CLI; the
100/// [`HarnpackError::code`] discriminates failure modes for JSON
101/// callers and tests.
102pub fn prepare_harnpack<W: Write>(
103    path: &Path,
104    options: &HarnpackRunOptions,
105    stderr: &mut W,
106) -> Result<PreparedHarnpack, HarnpackError> {
107    let bytes = fs::read(path).map_err(|err| {
108        HarnpackError::new(
109            "harnpack.read_failed",
110            format!("failed to read {}: {err}", path.display()),
111        )
112    })?;
113    let archive = read_harnpack(&bytes)?;
114    let manifest = archive.manifest;
115    let contents = archive.contents;
116
117    let (signature_verified, key_id) = match manifest.signature.as_ref() {
118        Some(signature) => {
119            verify_workflow_bundle_signature(&manifest, &contents)?;
120            (true, signature.key_id.clone())
121        }
122        None => {
123            if !options.allow_unsigned {
124                return Err(HarnpackError::new(
125                    "harnpack.unsigned",
126                    format!(
127                        "refusing to run unsigned bundle {} \
128                         (re-run with --allow-unsigned to override)",
129                        path.display()
130                    ),
131                ));
132            }
133            (false, None)
134        }
135    };
136
137    check_harn_version_compat(&manifest.harn_version, stderr)?;
138    let bundle_hash = workflow_bundle_hash(&manifest, &contents)?;
139    let cache_dir = bytecode_cache::packs_cache_dir().join(sanitize_bundle_hash(&bundle_hash));
140    let cache_hit = manifest_already_replayed(&cache_dir, &manifest)?;
141    if !cache_hit {
142        replay_archive(&cache_dir, &manifest, &contents)?;
143    }
144
145    let entrypoint_path = cache_dir.join("sources").join(&manifest.entrypoint);
146    if !entrypoint_path.exists() {
147        return Err(HarnpackError::new(
148            "harnpack.missing_entrypoint",
149            format!(
150                "manifest entrypoint {} not present in unpacked bundle at {}",
151                manifest.entrypoint.display(),
152                entrypoint_path.display()
153            ),
154        ));
155    }
156
157    Ok(PreparedHarnpack {
158        bundle_hash,
159        signature_verified,
160        key_id,
161        cache_hit,
162        cache_dir,
163        entrypoint_path,
164        manifest,
165    })
166}
167
168/// Translate a `blake3:<hex>` digest into a filename-safe directory
169/// component. `:` is illegal in some path layers (Windows, `tar`
170/// member names), so swap it for `_` while keeping the algorithm
171/// prefix for forensic readability.
172fn sanitize_bundle_hash(hash: &str) -> String {
173    hash.replace(':', "_")
174}
175
176/// `harn_version` compatibility check: refuse on a major or minor
177/// mismatch, warn on a patch mismatch. The contract is documented on
178/// issue #1784.
179fn check_harn_version_compat<W: Write>(
180    bundle_version: &str,
181    stderr: &mut W,
182) -> Result<(), HarnpackError> {
183    let current_version = env!("CARGO_PKG_VERSION");
184    if bundle_version == current_version {
185        return Ok(());
186    }
187    let (Some(bundle), Some(current)) = (
188        parse_semver_triplet(bundle_version),
189        parse_semver_triplet(current_version),
190    ) else {
191        let _ = writeln!(
192            stderr,
193            "warning: harnpack harn_version {bundle_version} is not parseable; running anyway"
194        );
195        return Ok(());
196    };
197    if bundle.0 != current.0 || bundle.1 != current.1 {
198        return Err(HarnpackError::new(
199            "harnpack.version_mismatch",
200            format!(
201                "harnpack was built for harn {bundle_version}; \
202                 this runtime is {current_version} (major/minor mismatch refused)"
203            ),
204        ));
205    }
206    let _ = writeln!(
207        stderr,
208        "warning: harnpack was built for harn {bundle_version}; \
209         this runtime is {current_version} (patch mismatch)"
210    );
211    Ok(())
212}
213
214/// Parse the `major.minor.patch` triplet from a version string,
215/// ignoring any pre-release or build metadata. Returns `None` when the
216/// string can't be parsed as `<u32>.<u32>.<u32>` at the front — callers
217/// fall back to a permissive warning so unusual version pins don't
218/// strand a working bundle.
219fn parse_semver_triplet(input: &str) -> Option<(u32, u32, u32)> {
220    let core = input.split_once('-').map(|(head, _)| head).unwrap_or(input);
221    let core = core.split_once('+').map(|(head, _)| head).unwrap_or(core);
222    let mut parts = core.split('.');
223    let major = parts.next()?.parse().ok()?;
224    let minor = parts.next()?.parse().ok()?;
225    let patch = parts.next()?.parse().ok()?;
226    Some((major, minor, patch))
227}
228
229/// Returns true when `cache_dir` already holds a previously-replayed
230/// archive whose `harnpack.json` matches `manifest`. Content addressing
231/// (`bundle_hash` in the directory name) makes a single positive match
232/// sufficient; we still cross-check the manifest payload to defend
233/// against partial writes from a prior crash.
234fn manifest_already_replayed(
235    cache_dir: &Path,
236    manifest: &WorkflowBundle,
237) -> Result<bool, HarnpackError> {
238    let manifest_path = cache_dir.join("harnpack.json");
239    let Ok(bytes) = fs::read(&manifest_path) else {
240        return Ok(false);
241    };
242    let cached: WorkflowBundle = match serde_json::from_slice(&bytes) {
243        Ok(value) => value,
244        Err(_) => return Ok(false),
245    };
246    Ok(&cached == manifest)
247}
248
249/// Unpack the bundle into a fresh staging directory and then rename
250/// into the content-addressed cache slot atomically. The intermediate
251/// directory keeps a crash mid-extract from leaving a half-populated
252/// `<bundle_hash>/` that future runs would mistake for a cache hit.
253fn replay_archive(
254    cache_dir: &Path,
255    manifest: &WorkflowBundle,
256    contents: &[HarnpackEntry],
257) -> Result<(), HarnpackError> {
258    let parent = cache_dir.parent().ok_or_else(|| {
259        HarnpackError::new(
260            "harnpack.replay_failed",
261            format!("pack cache path has no parent: {}", cache_dir.display()),
262        )
263    })?;
264    fs::create_dir_all(parent).map_err(|err| io_err("harnpack.replay_failed", err, parent))?;
265    let staging = tempfile::Builder::new()
266        .prefix(".staging-")
267        .tempdir_in(parent)
268        .map_err(|err| io_err("harnpack.replay_failed", err, parent))?;
269    let staging_path = staging.path().to_path_buf();
270
271    for entry in contents {
272        let dest = join_safe(&staging_path, &entry.path)?;
273        if let Some(parent) = dest.parent() {
274            fs::create_dir_all(parent)
275                .map_err(|err| io_err("harnpack.replay_failed", err, parent))?;
276        }
277        fs::write(&dest, &entry.bytes)
278            .map_err(|err| io_err("harnpack.replay_failed", err, &dest))?;
279    }
280
281    let manifest_bytes = serde_json::to_vec(manifest).map_err(|err| {
282        HarnpackError::new(
283            "harnpack.replay_failed",
284            format!("failed to encode manifest for cache: {err}"),
285        )
286    })?;
287    let manifest_path = staging_path.join("harnpack.json");
288    fs::write(&manifest_path, &manifest_bytes)
289        .map_err(|err| io_err("harnpack.replay_failed", err, &manifest_path))?;
290
291    // `rename` is atomic on the same filesystem. Two concurrent runs
292    // unpacking the same bundle hash will both attempt the rename;
293    // whichever loses sees the destination already present and treats
294    // it as authoritative (content addressing guarantees equivalence).
295    // `TempDir::into_path()` defuses the auto-cleanup so the rename
296    // owns the directory.
297    let staged = staging.keep();
298    match fs::rename(&staged, cache_dir) {
299        Ok(()) => Ok(()),
300        Err(err) if cache_dir.join("harnpack.json").exists() => {
301            let _ = fs::remove_dir_all(&staged);
302            // The other writer's tree is now in place — pretend we won.
303            let _ = err;
304            Ok(())
305        }
306        Err(err) => {
307            let _ = fs::remove_dir_all(&staged);
308            Err(io_err("harnpack.replay_failed", err, cache_dir))
309        }
310    }
311}
312
313fn io_err(code: &'static str, err: io::Error, path: &Path) -> HarnpackError {
314    HarnpackError::new(code, format!("{}: {err}", path.display()))
315}
316
317/// Join an archive-relative path onto `base` while refusing anything
318/// that would escape via `..` or absolute components. `read_harnpack`
319/// already rejects unsafe entries at archive parse time; this is
320/// belt-and-braces defense for paths we synthesize on the host side.
321fn join_safe(base: &Path, rel: &Path) -> Result<PathBuf, HarnpackError> {
322    let mut out = base.to_path_buf();
323    for component in rel.components() {
324        match component {
325            Component::Normal(part) => out.push(part),
326            Component::CurDir => {}
327            Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
328                return Err(HarnpackError::new(
329                    "harnpack.unsafe_path",
330                    format!("refusing to unpack unsafe path: {}", rel.display()),
331                ));
332            }
333        }
334    }
335    Ok(out)
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341
342    #[test]
343    fn semver_triplet_parses_release_versions() {
344        assert_eq!(parse_semver_triplet("1.2.3"), Some((1, 2, 3)));
345        assert_eq!(parse_semver_triplet("0.10.42"), Some((0, 10, 42)));
346        assert_eq!(parse_semver_triplet("1.2.3-rc.1"), Some((1, 2, 3)));
347        assert_eq!(parse_semver_triplet("1.2.3+build.4"), Some((1, 2, 3)));
348        assert_eq!(parse_semver_triplet("garbage"), None);
349        assert_eq!(parse_semver_triplet("1.2"), None);
350    }
351
352    #[test]
353    fn sanitize_bundle_hash_replaces_colon() {
354        assert_eq!(sanitize_bundle_hash("blake3:abc"), "blake3_abc");
355        assert_eq!(sanitize_bundle_hash("nohash"), "nohash");
356    }
357
358    #[test]
359    fn check_harn_version_compat_warns_on_patch_mismatch() {
360        let current = env!("CARGO_PKG_VERSION");
361        let (major, minor, patch) = parse_semver_triplet(current).expect("current parses");
362        let other_patch = format!("{major}.{minor}.{}", patch.wrapping_add(1));
363        let mut stderr = String::new();
364        check_harn_version_compat(&other_patch, &mut stderr).expect("patch mismatch warns");
365        assert!(stderr.contains("patch mismatch"), "stderr was {stderr}");
366    }
367
368    #[test]
369    fn check_harn_version_compat_refuses_on_minor_mismatch() {
370        let current = env!("CARGO_PKG_VERSION");
371        let (major, minor, _patch) = parse_semver_triplet(current).expect("current parses");
372        let other_minor = format!("{major}.{}.0", minor.wrapping_add(1));
373        let mut stderr = String::new();
374        let err = check_harn_version_compat(&other_minor, &mut stderr)
375            .expect_err("minor mismatch must refuse");
376        assert_eq!(err.code, "harnpack.version_mismatch");
377    }
378
379    #[test]
380    fn check_harn_version_compat_is_lenient_with_unparseable_bundle_version() {
381        let mut stderr = String::new();
382        check_harn_version_compat("not-a-version", &mut stderr).expect("permissive on parse fail");
383        assert!(stderr.contains("not parseable"));
384    }
385
386    #[test]
387    fn join_safe_refuses_traversal() {
388        let base = PathBuf::from("/tmp/cache");
389        assert!(join_safe(&base, Path::new("../escape")).is_err());
390        assert!(join_safe(&base, Path::new("/abs/path")).is_err());
391        assert_eq!(
392            join_safe(&base, Path::new("sources/hello.harn")).unwrap(),
393            base.join("sources").join("hello.harn"),
394        );
395    }
396}