Skip to main content

vanta_install/
lib.rs

1//! `vanta-install` — the install engine.
2//!
3//! Drives the lifecycle stages `[4 Fetch]`..`[8 Commit]` (`docs/08-installation.md`)
4//! for a resolved artifact: download (mirror-aware, resumable), verify the
5//! checksum (fail-closed), materialize (extract) into a staging tree, publish it
6//! atomically into the content-addressed store, and record a new generation.
7//!
8//! The entry point takes a resolved [`Artifact`] (produced by `vanta-resolve`).
9//! Supported archive formats: `tar.gz`/`tgz`, `zip`, and `raw`.
10#![forbid(unsafe_code)]
11
12use std::fs;
13use std::io::Read;
14use std::path::{Path, PathBuf};
15use vanta_core::{Area, Artifact, BuildRecipe, Platform, StoreKey, VtaError, VtaResult};
16use vanta_net::Downloader;
17use vanta_security::Policy;
18use vanta_state::{GenerationRecord, State, StoreEntryMeta};
19use vanta_store::Store;
20
21/// Observes the progress of an [`Engine::install_artifact_reported`] run so a
22/// caller (the CLI) can render download bars and phase spinners without this
23/// crate depending on a UI crate. All methods have no-op defaults; the unit
24/// type `()` implements it as a fully silent reporter.
25pub trait Reporter {
26    /// The fetch stage is about to begin; `total` is the artifact's declared
27    /// size in bytes when known (used as the download bar's length).
28    fn fetch_start(&self, total: Option<u64>) {
29        let _ = total;
30    }
31    /// `n` more bytes have been downloaded.
32    fn fetch_inc(&self, n: u64) {
33        let _ = n;
34    }
35    /// A new post-fetch phase has begun (e.g. `"verifying"`, `"extracting"`).
36    fn phase(&self, name: &str) {
37        let _ = name;
38    }
39}
40
41/// Silent reporter: the default when no progress UI is wired in.
42impl Reporter for () {}
43
44/// Default ceiling on the total decompressed size of an archive (audit M8). A
45/// gzip bomb that would expand past this aborts extraction rather than filling
46/// the disk. Overridable via [`Engine::with_max_decompressed`].
47pub const DEFAULT_MAX_DECOMPRESSED: u64 = 2 * 1024 * 1024 * 1024; // 2 GiB
48
49/// The install engine, bound to a `$VANTA_HOME`.
50pub struct Engine {
51    store: Store,
52    state: State,
53    downloader: Downloader,
54    home: PathBuf,
55    /// Verification policy (audit H2). When `require_signature` is set, a missing
56    /// or untrusted signature is a hard error (fail-closed).
57    policy: Policy,
58    /// Hard ceiling on decompressed archive bytes (audit M8).
59    max_decompressed: u64,
60}
61
62impl Engine {
63    /// Open the engine over `home` (`$VANTA_HOME`) with the default (permissive)
64    /// policy — checksum-gated, signatures verified when present. Use
65    /// [`Engine::open_with_policy`] to require signatures.
66    pub fn open(home: impl AsRef<Path>) -> VtaResult<Engine> {
67        Self::open_with_policy(home, Policy::default())
68    }
69
70    /// Open the engine with an explicit verification [`Policy`] (audit H2).
71    pub fn open_with_policy(home: impl AsRef<Path>, policy: Policy) -> VtaResult<Engine> {
72        let home = home.as_ref().to_path_buf();
73        let store = Store::open(&home)?;
74        let state = State::open(&home.join("state.db"))?;
75        let downloader = Downloader::new()?;
76        Ok(Engine {
77            store,
78            state,
79            downloader,
80            home,
81            policy,
82            max_decompressed: DEFAULT_MAX_DECOMPRESSED,
83        })
84    }
85
86    /// Override the decompressed-size ceiling (audit M8).
87    pub fn with_max_decompressed(mut self, max: u64) -> Self {
88        self.max_decompressed = max;
89        self
90    }
91
92    /// Borrow the underlying store / state (for `gc`, `which`, etc.).
93    pub fn store(&self) -> &Store {
94        &self.store
95    }
96    pub fn state(&self) -> &State {
97        &self.state
98    }
99
100    /// Install one resolved artifact for the current platform, returning its
101    /// store key. Fetch → verify → materialize → publish → commit a generation.
102    /// A store hit short-circuits fetch/verify/materialize.
103    pub fn install_artifact(
104        &self,
105        tool: &str,
106        version: &str,
107        artifact: &Artifact,
108    ) -> VtaResult<StoreKey> {
109        self.install_artifact_reported(tool, version, artifact, &())
110    }
111
112    /// Like [`Engine::install_artifact`], but drives `reporter` with download
113    /// byte counts and phase transitions so a caller can render progress.
114    pub fn install_artifact_reported(
115        &self,
116        tool: &str,
117        version: &str,
118        artifact: &Artifact,
119        reporter: &dyn Reporter,
120    ) -> VtaResult<StoreKey> {
121        // Policy precheck (audit H2): when a signature is required, an artifact
122        // lacking a signature OR a *trusted* signing key (the resolver drops
123        // untrusted keys, audit C1) is refused — fail-closed, before any I/O.
124        let has_trusted_sig = artifact.signature.is_some() && artifact.signature_key.is_some();
125        if self.policy.require_signature && !has_trusted_sig {
126            return Err(VtaError::new(
127                Area::Vrf,
128                3,
129                format!(
130                    "signature required by policy but `{tool} {version}` is unsigned \
131                     or its signing key is not trusted"
132                ),
133            ));
134        }
135
136        // [3 Plan] — if the lock already named a key and it is present, reuse it
137        // ONLY if it still verifies (audit H4): a store hit must not be trusted
138        // blindly, since the entry could have been poisoned (audit H3) or the
139        // lockfile's `store_key` is attacker-influenceable. On mismatch, drop the
140        // bad entry and fall through to a fresh fetch + verify.
141        if let Some(key) = &artifact.store_key {
142            if self.store.has(key) {
143                if self.store.verify_entry(key)? {
144                    self.link_bins(key, &artifact.bin)?;
145                    self.record(tool, version, key, &artifact.checksum.value)?;
146                    return Ok(key.clone());
147                }
148                self.store.remove_entry(key)?;
149            }
150        }
151
152        // [4 Fetch] — cap downloaded bytes at the declared size when known (M8).
153        let dl = self
154            .store
155            .downloads_dir()
156            .join(format!("incoming-{tool}-{}", std::process::id()));
157        let mut urls = vec![artifact.url.clone()];
158        urls.extend(artifact.mirrors.clone());
159        reporter.fetch_start(artifact.size);
160        self.downloader.download_any_with_progress(
161            &urls,
162            &dl,
163            artifact.size,
164            Some(&|n| reporter.fetch_inc(n)),
165        )?;
166
167        // [5 Verify] — fail closed (centralized in vanta-security).
168        reporter.phase("verifying");
169        if let Err(e) =
170            vanta_security::verify_file(&dl, &artifact.checksum.algo, &artifact.checksum.value)
171        {
172            let _ = fs::remove_file(&dl);
173            return Err(e);
174        }
175        // Signature verification when the registry pinned a signature + trusted
176        // key. The key's trust is established upstream (audit C1, in the resolver);
177        // by this point a present `signature_key` is one we trust.
178        if let (Some(sig), Some(key_text)) = (&artifact.signature, &artifact.signature_key) {
179            let key = vanta_security::parse_minisign_pubkey(key_text)?;
180            let bytes = fs::read(&dl).map_err(|e| io(&dl, e))?;
181            if let Err(e) = vanta_security::minisign_verify(&bytes, sig, &key) {
182                let _ = fs::remove_file(&dl);
183                return Err(e);
184            }
185        }
186
187        // [6 Materialize]
188        let name = artifact
189            .bin
190            .first()
191            .map(|b| basename(b))
192            .unwrap_or_else(|| tool.to_string());
193        // The tree that will be published into the store.
194        let staging = self.store.new_staging()?;
195        if let Some(recipe) = &artifact.build {
196            // Source build: extract the (verified) source into a scratch tree,
197            // compile it with the signed recipe, and publish the resulting
198            // install prefix (`staging`) rather than the source.
199            reporter.phase("extracting source");
200            let src = self.store.new_staging()?;
201            extract(
202                &artifact.archive,
203                &dl,
204                &src,
205                &name,
206                artifact.strip,
207                self.max_decompressed,
208            )?;
209            let _ = fs::remove_file(&dl);
210            run_build(recipe, &src, &staging, tool, version, reporter)?;
211            let _ = fs::remove_dir_all(&src);
212        } else {
213            reporter.phase("extracting");
214            extract(
215                &artifact.archive,
216                &dl,
217                &staging,
218                &name,
219                artifact.strip,
220                self.max_decompressed,
221            )?;
222            let _ = fs::remove_file(&dl);
223        }
224
225        // [6 Materialize, cont.] atomic publish into the store.
226        let key = self.store.publish_tree(&staging)?;
227
228        // [7 Link] expose the tool's executables on PATH via ~/.vanta/bin.
229        self.link_bins(&key, &artifact.bin)?;
230
231        // [8 Commit]
232        self.record(tool, version, &key, &artifact.checksum.value)?;
233        Ok(key)
234    }
235
236    /// Link a store entry's declared executables into `~/.vanta/bin` (placed on
237    /// PATH by the shell hook). Per-directory environment views are composed by
238    /// `vanta-env` (`docs/10-environments.md`).
239    fn link_bins(&self, key: &StoreKey, bins: &[String]) -> VtaResult<()> {
240        let bin_dir = self.home.join("bin");
241        fs::create_dir_all(&bin_dir).map_err(|e| io(&bin_dir, e))?;
242        let entry = self.store.entry_path(key);
243        for bin in bins {
244            let src = entry.join(bin);
245            if src.exists() {
246                let dst = bin_dir.join(basename(bin));
247                vanta_store::link_best(&src, &dst)?;
248            }
249        }
250        Ok(())
251    }
252
253    fn record(&self, tool: &str, version: &str, key: &StoreKey, sha256: &str) -> VtaResult<()> {
254        let platform = Platform::current().token();
255        self.state.put_store_entry(
256            key.as_str(),
257            &StoreEntryMeta {
258                tool: tool.to_string(),
259                version: version.to_string(),
260                platform,
261                size: 0,
262                sha256: sha256.to_string(),
263            },
264        )?;
265        let parent = self.state.current()?;
266        let id = parent.map(|c| c + 1).unwrap_or(1);
267        self.state.append_generation(&GenerationRecord {
268            id,
269            parent,
270            command: format!("vanta add {tool}@{version}"),
271            reason: "add".to_string(),
272            tools: vec![(tool.to_string(), key.as_str().to_string())],
273        })?;
274        self.state.set_current(id)?;
275        Ok(())
276    }
277
278    /// Store keys referenced by the active generation.
279    fn active_store_keys(&self) -> VtaResult<Vec<StoreKey>> {
280        let mut keys = Vec::new();
281        if let Some(current) = self.state.current()? {
282            if let Some(gen) = self.state.get_generation(current)? {
283                for (_, k) in gen.tools {
284                    if let Ok(sk) = StoreKey::new(k) {
285                        keys.push(sk);
286                    }
287                }
288            }
289        }
290        Ok(keys)
291    }
292
293    /// Bundle the active generation's store entries into a portable archive
294    /// (`docs/13-offline.md`). Returns the number of entries written.
295    pub fn bundle_current(&self, out: &Path) -> VtaResult<usize> {
296        let keys = self.active_store_keys()?;
297        let file = fs::File::create(out).map_err(|e| io(out, e))?;
298        let enc = flate2::write::GzEncoder::new(file, flate2::Compression::default());
299        let mut builder = tar::Builder::new(enc);
300        let list = keys
301            .iter()
302            .map(|k| k.as_str())
303            .collect::<Vec<_>>()
304            .join("\n");
305        let mut header = tar::Header::new_gnu();
306        header.set_size(list.len() as u64);
307        header.set_mode(0o644);
308        header.set_cksum();
309        builder
310            .append_data(&mut header, "KEYS", list.as_bytes())
311            .map_err(|e| inst(format!("bundle KEYS: {e}")))?;
312        for key in &keys {
313            let dir = self.store.entry_path(key);
314            if dir.is_dir() {
315                builder
316                    .append_dir_all(key.as_str(), &dir)
317                    .map_err(|e| inst(format!("bundle {key}: {e}")))?;
318            }
319        }
320        let enc = builder
321            .into_inner()
322            .map_err(|e| inst(format!("bundle finalize: {e}")))?;
323        enc.finish()
324            .map_err(|e| inst(format!("bundle gzip: {e}")))?;
325        Ok(keys.len())
326    }
327
328    /// Restore store entries from a bundle, verifying each entry's integrity
329    /// against its content-addressed key. Returns the number newly imported.
330    pub fn restore(&self, bundle: &Path) -> VtaResult<usize> {
331        let file = fs::File::open(bundle).map_err(|e| io(bundle, e))?;
332        let gz = flate2::read::GzDecoder::new(file);
333        let mut archive = tar::Archive::new(gz);
334        let staging = self.store.new_staging()?;
335        archive
336            .unpack(&staging)
337            .map_err(|e| inst(format!("restore unpack: {e}")))?;
338        let keys_txt =
339            fs::read_to_string(staging.join("KEYS")).map_err(|e| io(&staging.join("KEYS"), e))?;
340        let mut restored = 0;
341        for line in keys_txt.lines() {
342            let key = line.trim();
343            if key.is_empty() {
344                continue;
345            }
346            // `StoreKey::new` enforces the fixed-width lowercase-hex shape (M7),
347            // so `staging.join(key)` below cannot traverse out of staging.
348            let sk = StoreKey::new(key)?;
349            let dst = self.store.entry_path(&sk);
350            if dst.exists() {
351                // Already present (and immutable + verified at insert); nothing
352                // to import for this key.
353                continue;
354            }
355            let src = staging.join(key);
356            if !src.is_dir() {
357                continue;
358            }
359            // Audit H3: verify the staged subtree hashes to its claimed key
360            // BEFORE publishing it into the canonical store. A bundle whose
361            // contents do not match the `blake3-<hash>` dir name is rejected and
362            // the store is left unchanged (the staging dir is removed below).
363            let actual = vanta_store::hash_tree(&src)?;
364            if actual != sk.as_str() {
365                let _ = fs::remove_dir_all(&staging);
366                return Err(VtaError::new(
367                    Area::Vrf,
368                    1,
369                    format!("bundled entry {key} failed integrity verification (content mismatch)"),
370                ));
371            }
372            // Bundled entries are read-only; add write so the dir can be moved.
373            let _ = vanta_store::ensure_writable(&src);
374            fs::rename(&src, &dst).map_err(|e| io(&dst, e))?;
375            restored += 1;
376        }
377        let _ = fs::remove_dir_all(&staging);
378        Ok(restored)
379    }
380
381    /// Remove a tool: record a new generation without it and unlink its primary
382    /// executable. Returns whether the tool was present.
383    pub fn remove(&self, tool: &str) -> VtaResult<bool> {
384        let current = match self.state.current()? {
385            Some(c) => c,
386            None => return Ok(false),
387        };
388        let gen = match self.state.get_generation(current)? {
389            Some(g) => g,
390            None => return Ok(false),
391        };
392        if !gen.tools.iter().any(|(t, _)| t == tool) {
393            return Ok(false);
394        }
395        let tools: Vec<(String, String)> = gen
396            .tools
397            .iter()
398            .filter(|(t, _)| t != tool)
399            .cloned()
400            .collect();
401        let id = current + 1;
402        self.state.append_generation(&GenerationRecord {
403            id,
404            parent: Some(current),
405            command: format!("vanta remove {tool}"),
406            reason: "remove".to_string(),
407            tools,
408        })?;
409        self.state.set_current(id)?;
410        let _ = fs::remove_file(self.home.join("bin").join(tool));
411        Ok(true)
412    }
413}
414
415fn inst(msg: String) -> VtaError {
416    VtaError::new(Area::Inst, 1, msg)
417}
418
419/// Compile a source tree into an install prefix using a signed [`BuildRecipe`].
420///
421/// Trust model: the recipe comes from the root-signed registry index (not the
422/// fetched source, not the user's environment), so the *commands* are trusted
423/// input; the *source bytes* were checksum-verified before extraction. Each
424/// step runs in `src_dir` with `{prefix}`/`{jobs}` substituted and `PREFIX`
425/// exported; the tool's compiler/`make` are resolved from the host `PATH`
426/// (a documented system dependency, like Homebrew/asdf source builds). Build
427/// output streams to the user's terminal. A non-zero exit aborts the install
428/// fail-closed, so a broken build never publishes a partial tree.
429fn run_build(
430    recipe: &BuildRecipe,
431    src_dir: &Path,
432    prefix: &Path,
433    tool: &str,
434    version: &str,
435    reporter: &dyn Reporter,
436) -> VtaResult<()> {
437    // Autotools builds expect the prefix to exist.
438    fs::create_dir_all(prefix).map_err(|e| io(prefix, e))?;
439    let prefix_str = prefix.to_string_lossy().to_string();
440    let jobs = std::thread::available_parallelism()
441        .map(|n| n.get())
442        .unwrap_or(1)
443        .to_string();
444
445    for (i, step) in recipe.steps.iter().enumerate() {
446        let Some((prog, rest)) = step.split_first() else {
447            continue; // skip an empty step defensively
448        };
449        let args: Vec<String> = rest
450            .iter()
451            .map(|a| a.replace("{prefix}", &prefix_str).replace("{jobs}", &jobs))
452            .collect();
453        let prog = prog
454            .replace("{prefix}", &prefix_str)
455            .replace("{jobs}", &jobs);
456
457        reporter.phase(&format!(
458            "building {tool} {version} [{}/{}]: {prog}",
459            i + 1,
460            recipe.steps.len()
461        ));
462        let status = std::process::Command::new(&prog)
463            .args(&args)
464            .current_dir(src_dir)
465            .env("PREFIX", &prefix_str)
466            .status()
467            .map_err(|e| {
468                VtaError::new(
469                    Area::Inst,
470                    4,
471                    format!("source build of `{tool} {version}`: cannot start `{prog}`: {e}"),
472                )
473            })?;
474        if !status.success() {
475            return Err(VtaError::new(
476                Area::Inst,
477                4,
478                format!(
479                    "source build of `{tool} {version}`: step `{prog}` failed ({status}); \
480                     ensure the required build toolchain (C compiler, make, …) is installed"
481                ),
482            ));
483        }
484    }
485
486    // Sanity: a build that installed nothing into the prefix is a failure, not
487    // a silent empty publish.
488    let populated = fs::read_dir(prefix)
489        .map(|mut d| d.next().is_some())
490        .unwrap_or(false);
491    if !populated {
492        return Err(VtaError::new(
493            Area::Inst,
494            4,
495            format!("source build of `{tool} {version}`: recipe produced an empty install prefix"),
496        ));
497    }
498    Ok(())
499}
500
501/// Materialize an artifact's bytes into `dest` according to its archive kind,
502/// stripping `strip` leading path components (the provider's layout).
503/// `max_decompressed` caps the total decompressed bytes (audit M8).
504pub fn extract(
505    archive: &str,
506    src: &Path,
507    dest: &Path,
508    raw_name: &str,
509    strip: u32,
510    max_decompressed: u64,
511) -> VtaResult<()> {
512    match archive {
513        "tar.gz" | "tgz" => extract_targz(src, dest, strip, max_decompressed),
514        "zip" => extract_zip(src, dest, strip, max_decompressed),
515        "raw" => {
516            fs::create_dir_all(dest).map_err(|e| io(dest, e))?;
517            let out = dest.join(raw_name);
518            fs::copy(src, &out).map_err(|e| io(&out, e))?;
519            set_executable(&out);
520            Ok(())
521        }
522        other => Err(VtaError::new(
523            Area::Inst,
524            3,
525            format!("unsupported archive kind `{other}` (supported: tar.gz, tgz, zip, raw)"),
526        )),
527    }
528}
529
530/// Extract a `.zip` under the same security model as the tar path: path
531/// traversal rejected (zip-slip), a shared decompressed-bytes budget across all
532/// entries (M8), link targets validated before creation (M5), and
533/// setuid/setgid/sticky stripped from materialized modes (M5).
534fn extract_zip(src: &Path, dest: &Path, strip: u32, max_decompressed: u64) -> VtaResult<()> {
535    use std::path::PathBuf;
536    let file = fs::File::open(src).map_err(|e| io(src, e))?;
537    let mut archive = zip::ZipArchive::new(file)
538        .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading zip archive: {e}")))?;
539    fs::create_dir_all(dest).map_err(|e| io(dest, e))?;
540    let dest_canon = dest.canonicalize().map_err(|e| io(dest, e))?;
541    // M8: one budget across all entries, so many mid-size entries cannot
542    // multiply past the ceiling any more than one huge entry can.
543    let mut budget = max_decompressed;
544
545    for i in 0..archive.len() {
546        let mut entry = archive
547            .by_index(i)
548            .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading zip entry: {e}")))?;
549        // `enclosed_name` refuses absolute paths and any `..` component.
550        let Some(path) = entry.enclosed_name() else {
551            return Err(traversal());
552        };
553        let stripped: PathBuf = path.components().skip(strip as usize).collect();
554        if stripped.as_os_str().is_empty() {
555            continue;
556        }
557        if escapes(&stripped) {
558            return Err(traversal());
559        }
560        let out = dest.join(&stripped);
561
562        if entry.is_dir() {
563            fs::create_dir_all(&out).map_err(|e| io(&out, e))?;
564            continue;
565        }
566
567        if let Some(parent) = out.parent() {
568            fs::create_dir_all(parent).map_err(|e| io(parent, e))?;
569            // M5: realpath of the parent must stay under the staging root
570            // (defeats symlinked ancestors created by earlier entries).
571            let parent_canon = parent.canonicalize().map_err(|e| io(parent, e))?;
572            if !parent_canon.starts_with(&dest_canon) {
573                return Err(traversal());
574            }
575        }
576
577        let mode = entry.unix_mode();
578        // M5: symlink entries (mode S_IFLNK) carry the target as file content.
579        // Validate the target exactly like the tar path before creating.
580        if mode.is_some_and(|m| m & 0o170000 == 0o120000) {
581            let mut target = String::new();
582            LimitReader::new(&mut entry, 4096)
583                .read_to_string(&mut target)
584                .map_err(|e| VtaError::new(Area::Inst, 1, format!("zip link target: {e}")))?;
585            let target_path = Path::new(&target);
586            let base = stripped.parent().unwrap_or_else(|| Path::new(""));
587            if link_target_escapes(base, target_path) {
588                return Err(VtaError::new(
589                    Area::Inst,
590                    1,
591                    format!(
592                        "archive link entry `{}` has an unsafe target `{target}` (rejected)",
593                        stripped.display()
594                    ),
595                ));
596            }
597            #[cfg(unix)]
598            std::os::unix::fs::symlink(target_path, &out).map_err(|e| io(&out, e))?;
599            // Non-unix: skip symlinks (tool zips for windows do not rely on them).
600            continue;
601        }
602
603        let mut writer = fs::File::create(&out).map_err(|e| io(&out, e))?;
604        let mut limited = LimitReader::new(&mut entry, budget);
605        let copied = std::io::copy(&mut limited, &mut writer)
606            .map_err(|e| VtaError::new(Area::Inst, 1, format!("unpacking zip entry: {e}")))?;
607        budget = budget.saturating_sub(copied);
608
609        // Apply the entry's permission bits sans special bits (M5); default to
610        // 0644 when the zip carries no unix modes (created on Windows).
611        #[cfg(unix)]
612        {
613            use std::os::unix::fs::PermissionsExt;
614            let safe = mode.map(|m| m & 0o777).unwrap_or(0o644);
615            let _ = fs::set_permissions(&out, fs::Permissions::from_mode(safe));
616        }
617        strip_special_bits(&out);
618    }
619    Ok(())
620}
621
622fn extract_targz(src: &Path, dest: &Path, strip: u32, max_decompressed: u64) -> VtaResult<()> {
623    use std::path::PathBuf;
624    let file = fs::File::open(src).map_err(|e| io(src, e))?;
625    // M8: bound total decompressed bytes so a gzip bomb aborts rather than
626    // filling the disk.
627    let gz = LimitReader::new(flate2::read::GzDecoder::new(file), max_decompressed);
628    let mut archive = tar::Archive::new(gz);
629    // We re-apply a sanitized mode after unpack (M5: strip setuid/setgid), so we
630    // do not need tar to preserve raw permission bits.
631    archive.set_preserve_permissions(true);
632    let dest_canon = dest.canonicalize().map_err(|e| io(dest, e))?;
633    let entries = archive
634        .entries()
635        .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading archive: {e}")))?;
636    for entry in entries {
637        let mut entry = entry
638            .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading archive entry: {e}")))?;
639        let entry_type = entry.header().entry_type();
640        let path = entry
641            .path()
642            .map_err(|e| VtaError::new(Area::Inst, 1, format!("entry path: {e}")))?
643            .into_owned();
644        let stripped: PathBuf = path.components().skip(strip as usize).collect();
645        if stripped.as_os_str().is_empty() {
646            continue;
647        }
648        // Reject anything that could escape the destination (zip-slip / traversal).
649        if escapes(&stripped) {
650            return Err(traversal());
651        }
652        // M5: link entries (symlink/hardlink) get an extra check — their
653        // *target*, resolved against the link's own directory, must not escape
654        // the staging tree. `entry.unpack` would otherwise create a link
655        // pointing outside, which a later entry could write through. Internal
656        // `..` that resolves back inside the tree is allowed (e.g. node's
657        // `bin/corepack -> ../lib/node_modules/...`). A symlink target is
658        // relative to the link's directory; a tar hardlink target is relative
659        // to the archive root.
660        if matches!(entry_type, tar::EntryType::Symlink | tar::EntryType::Link) {
661            let target = entry
662                .link_name()
663                .map_err(|e| VtaError::new(Area::Inst, 1, format!("link target: {e}")))?
664                .map(|c| c.into_owned())
665                .unwrap_or_default();
666            let base = if entry_type == tar::EntryType::Symlink {
667                stripped.parent().unwrap_or_else(|| Path::new(""))
668            } else {
669                Path::new("")
670            };
671            if link_target_escapes(base, &target) {
672                return Err(VtaError::new(
673                    Area::Inst,
674                    1,
675                    format!(
676                        "archive link entry `{}` has an unsafe target `{}` (rejected)",
677                        stripped.display(),
678                        target.display()
679                    ),
680                ));
681            }
682        }
683        let out = dest.join(&stripped);
684        if let Some(parent) = out.parent() {
685            fs::create_dir_all(parent).map_err(|e| io(parent, e))?;
686            // M5: after the parent exists, canonicalize it and confirm the
687            // realpath still lies under the staging root (defeats symlinked
688            // ancestors pointing elsewhere).
689            let parent_canon = parent.canonicalize().map_err(|e| io(parent, e))?;
690            if !parent_canon.starts_with(&dest_canon) {
691                return Err(traversal());
692            }
693        }
694        entry
695            .unpack(&out)
696            .map_err(|e| VtaError::new(Area::Inst, 1, format!("unpacking entry: {e}")))?;
697        // M5: strip setuid/setgid/sticky bits from materialized files.
698        strip_special_bits(&out);
699    }
700    Ok(())
701}
702
703/// Whether a link `target`, interpreted relative to `base_dir` (the directory
704/// the link lives in — empty for archive-root-relative hardlink targets),
705/// stays within the extraction root. Lexical only (no filesystem access): an
706/// absolute target, a rooted/prefixed component, or a `..` sequence that pops
707/// above the root all escape. Internal `..` that resolves back inside the tree
708/// (e.g. node's `bin/corepack -> ../lib/node_modules/...`) is permitted.
709fn link_target_escapes(base_dir: &Path, target: &Path) -> bool {
710    use std::path::Component;
711    if target.is_absolute() {
712        return true;
713    }
714    let mut depth: i64 = 0;
715    for c in base_dir.components() {
716        match c {
717            Component::Normal(_) => depth += 1,
718            Component::ParentDir => depth -= 1,
719            _ => {}
720        }
721    }
722    for c in target.components() {
723        match c {
724            Component::Normal(_) => depth += 1,
725            Component::CurDir => {}
726            Component::ParentDir => {
727                depth -= 1;
728                if depth < 0 {
729                    return true;
730                }
731            }
732            Component::RootDir | Component::Prefix(_) => return true,
733        }
734    }
735    depth < 0
736}
737
738/// Whether a relative path contains a component that would escape its base.
739fn escapes(p: &Path) -> bool {
740    use std::path::Component;
741    p.components().any(|c| {
742        matches!(
743            c,
744            Component::ParentDir | Component::RootDir | Component::Prefix(_)
745        )
746    })
747}
748
749fn traversal() -> VtaError {
750    VtaError::new(
751        Area::Inst,
752        1,
753        "archive entry escapes destination (path traversal rejected)".to_string(),
754    )
755}
756
757/// A reader that errors once more than `limit` bytes have been read (audit M8).
758struct LimitReader<R> {
759    inner: R,
760    remaining: u64,
761}
762
763impl<R> LimitReader<R> {
764    fn new(inner: R, limit: u64) -> Self {
765        LimitReader {
766            inner,
767            remaining: limit,
768        }
769    }
770}
771
772impl<R: Read> Read for LimitReader<R> {
773    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
774        let n = self.inner.read(buf)?;
775        let n64 = n as u64;
776        if n64 > self.remaining {
777            return Err(std::io::Error::new(
778                std::io::ErrorKind::InvalidData,
779                "decompressed size exceeds configured maximum (possible decompression bomb)",
780            ));
781        }
782        self.remaining -= n64;
783        Ok(n)
784    }
785}
786
787/// Strip setuid/setgid/sticky bits from a materialized path (audit M5).
788#[cfg(unix)]
789fn strip_special_bits(path: &Path) {
790    use std::os::unix::fs::PermissionsExt;
791    // Symlinks carry no meaningful permission bits; skip (and avoid following).
792    if let Ok(meta) = fs::symlink_metadata(path) {
793        if meta.file_type().is_symlink() {
794            return;
795        }
796        let mode = meta.permissions().mode();
797        let safe = mode & 0o777; // drop 0o7000 (setuid/setgid/sticky)
798        if safe != mode {
799            let mut perms = meta.permissions();
800            perms.set_mode(safe);
801            let _ = fs::set_permissions(path, perms);
802        }
803    }
804}
805
806#[cfg(not(unix))]
807fn strip_special_bits(_path: &Path) {}
808
809fn basename(p: &str) -> String {
810    p.rsplit(['/', '\\']).next().unwrap_or(p).to_string()
811}
812
813#[cfg(unix)]
814fn set_executable(path: &Path) {
815    use std::os::unix::fs::PermissionsExt;
816    if let Ok(meta) = fs::metadata(path) {
817        let mut perms = meta.permissions();
818        perms.set_mode(perms.mode() | 0o755);
819        let _ = fs::set_permissions(path, perms);
820    }
821}
822
823#[cfg(not(unix))]
824fn set_executable(_path: &Path) {}
825
826fn io(path: &Path, e: std::io::Error) -> VtaError {
827    VtaError::new(Area::Inst, 2, format!("{}: {e}", path.display()))
828}
829
830#[cfg(test)]
831mod tests {
832    use super::*;
833
834    fn home(tag: &str) -> PathBuf {
835        let p = std::env::temp_dir().join(format!("vanta-install-{}-{}", tag, std::process::id()));
836        let _ = fs::remove_dir_all(&p);
837        p
838    }
839
840    #[test]
841    fn engine_opens_and_creates_state() {
842        let h = home("open");
843        let e = Engine::open(&h).unwrap();
844        assert_eq!(
845            e.state().schema_version().unwrap(),
846            vanta_state::SCHEMA_VERSION
847        );
848        let _ = fs::remove_dir_all(&h);
849    }
850
851    #[test]
852    fn extracts_targz_then_publishes() {
853        use flate2::write::GzEncoder;
854        use flate2::Compression;
855
856        // Build a small .tar.gz in memory: one file `bin/tool`.
857        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
858        let mut header = tar::Header::new_gnu();
859        let payload = b"#!/bin/sh\necho hi\n";
860        header.set_size(payload.len() as u64);
861        header.set_mode(0o755);
862        header.set_cksum();
863        builder
864            .append_data(&mut header, "bin/tool", &payload[..])
865            .unwrap();
866        let gz = builder.into_inner().unwrap();
867        let bytes = gz.finish().unwrap();
868
869        let h = home("targz");
870        let store = Store::open(&h).unwrap();
871        let archive_path = store.downloads_dir().join("a.tar.gz");
872        fs::write(&archive_path, &bytes).unwrap();
873
874        let staging = store.new_staging().unwrap();
875        extract(
876            "tar.gz",
877            &archive_path,
878            &staging,
879            "tool",
880            0,
881            DEFAULT_MAX_DECOMPRESSED,
882        )
883        .unwrap();
884        assert!(staging.join("bin/tool").exists());
885
886        let key = store.publish_tree(&staging).unwrap();
887        assert!(store.has(&key));
888        assert!(store.verify_entry(&key).unwrap());
889        let _ = fs::remove_dir_all(&h);
890    }
891
892    #[test]
893    fn source_build_runs_recipe_into_prefix() {
894        let src = home("bld-src");
895        fs::create_dir_all(&src).unwrap();
896        let prefix = home("bld-pfx");
897        // A tiny two-step recipe exercising {prefix} substitution + ordering.
898        let recipe = BuildRecipe {
899            steps: vec![
900                vec!["sh".into(), "-c".into(), "mkdir -p {prefix}/bin".into()],
901                vec![
902                    "sh".into(),
903                    "-c".into(),
904                    "printf '#!/bin/sh\\necho hi\\n' > {prefix}/bin/tool && chmod +x {prefix}/bin/tool"
905                        .into(),
906                ],
907            ],
908        };
909        run_build(&recipe, &src, &prefix, "tool", "1.0", &()).unwrap();
910        let bin = prefix.join("bin/tool");
911        assert!(bin.exists(), "recipe should install bin/tool");
912        let out = std::process::Command::new(&bin).output().unwrap();
913        assert_eq!(String::from_utf8_lossy(&out.stdout).trim(), "hi");
914        let _ = fs::remove_dir_all(&src);
915        let _ = fs::remove_dir_all(&prefix);
916    }
917
918    #[test]
919    fn source_build_failing_step_aborts() {
920        let src = home("bld-fail-src");
921        fs::create_dir_all(&src).unwrap();
922        let prefix = home("bld-fail-pfx");
923        let recipe = BuildRecipe {
924            steps: vec![vec!["sh".into(), "-c".into(), "exit 3".into()]],
925        };
926        let err = run_build(&recipe, &src, &prefix, "tool", "1.0", &()).unwrap_err();
927        assert!(err.to_string().contains("failed"), "{err}");
928        let _ = fs::remove_dir_all(&src);
929        let _ = fs::remove_dir_all(&prefix);
930    }
931
932    #[test]
933    fn source_build_empty_prefix_rejected() {
934        let src = home("bld-empty-src");
935        fs::create_dir_all(&src).unwrap();
936        let prefix = home("bld-empty-pfx");
937        // Recipe "succeeds" but installs nothing → must not publish empty tree.
938        let recipe = BuildRecipe {
939            steps: vec![vec!["sh".into(), "-c".into(), "true".into()]],
940        };
941        let err = run_build(&recipe, &src, &prefix, "tool", "1.0", &()).unwrap_err();
942        assert!(err.to_string().contains("empty install prefix"), "{err}");
943        let _ = fs::remove_dir_all(&src);
944        let _ = fs::remove_dir_all(&prefix);
945    }
946
947    #[test]
948    fn link_target_guard_allows_internal_dotdot_rejects_escape() {
949        use std::path::Path;
950        // node's real symlink: bin/corepack -> ../lib/node_modules/... stays in.
951        assert!(!link_target_escapes(
952            Path::new("bin"),
953            Path::new("../lib/node_modules/corepack/dist/corepack.js")
954        ));
955        // sibling reference stays in.
956        assert!(!link_target_escapes(Path::new("bin"), Path::new("node")));
957        // pops above root → escape.
958        assert!(link_target_escapes(
959            Path::new("bin"),
960            Path::new("../../etc/passwd")
961        ));
962        // absolute → escape.
963        assert!(link_target_escapes(
964            Path::new("bin"),
965            Path::new("/etc/passwd")
966        ));
967        // hardlink base is root: a leading `..` escapes immediately.
968        assert!(link_target_escapes(Path::new(""), Path::new("../x")));
969        assert!(!link_target_escapes(Path::new(""), Path::new("bin/node")));
970    }
971
972    /// Build an in-memory zip: (path, mode, payload) triples.
973    fn make_zip(entries: &[(&str, u32, &[u8])]) -> Vec<u8> {
974        use std::io::Write;
975        let mut w = zip::ZipWriter::new(std::io::Cursor::new(Vec::new()));
976        for (name, mode, payload) in entries {
977            let opts = zip::write::SimpleFileOptions::default().unix_permissions(*mode);
978            w.start_file(*name, opts).unwrap();
979            w.write_all(payload).unwrap();
980        }
981        w.finish().unwrap().into_inner()
982    }
983
984    #[test]
985    fn extracts_zip_with_strip_and_modes() {
986        let h = home("zip");
987        let store = Store::open(&h).unwrap();
988        let bytes = make_zip(&[
989            ("terraform_1.9.0/terraform", 0o755, b"#!/bin/sh\necho tf\n"),
990            ("terraform_1.9.0/README.md", 0o644, b"docs"),
991        ]);
992        let archive_path = store.downloads_dir().join("a.zip");
993        fs::write(&archive_path, &bytes).unwrap();
994
995        let staging = store.new_staging().unwrap();
996        extract(
997            "zip",
998            &archive_path,
999            &staging,
1000            "terraform",
1001            1,
1002            DEFAULT_MAX_DECOMPRESSED,
1003        )
1004        .unwrap();
1005        let bin = staging.join("terraform");
1006        assert!(bin.exists());
1007        assert!(staging.join("README.md").exists());
1008        #[cfg(unix)]
1009        {
1010            use std::os::unix::fs::PermissionsExt;
1011            let mode = fs::metadata(&bin).unwrap().permissions().mode();
1012            assert_eq!(mode & 0o777, 0o755, "exec bit preserved from zip modes");
1013        }
1014        let _ = fs::remove_dir_all(&h);
1015    }
1016
1017    #[test]
1018    fn zip_slip_rejected() {
1019        let h = home("zipslip");
1020        let store = Store::open(&h).unwrap();
1021        let bytes = make_zip(&[("../evil", 0o644, b"pwn")]);
1022        let archive_path = store.downloads_dir().join("evil.zip");
1023        fs::write(&archive_path, &bytes).unwrap();
1024
1025        let staging = store.new_staging().unwrap();
1026        let err = extract(
1027            "zip",
1028            &archive_path,
1029            &staging,
1030            "evil",
1031            0,
1032            DEFAULT_MAX_DECOMPRESSED,
1033        )
1034        .unwrap_err();
1035        assert!(err.to_string().contains("traversal"), "{err}");
1036        let _ = fs::remove_dir_all(&h);
1037    }
1038
1039    #[test]
1040    fn zip_decompression_budget_enforced() {
1041        let h = home("zipbomb");
1042        let store = Store::open(&h).unwrap();
1043        let big = vec![0u8; 64 * 1024];
1044        let bytes = make_zip(&[("big.bin", 0o644, &big[..])]);
1045        let archive_path = store.downloads_dir().join("big.zip");
1046        fs::write(&archive_path, &bytes).unwrap();
1047
1048        let staging = store.new_staging().unwrap();
1049        // Budget below the decompressed size must abort, not fill the disk.
1050        let err = extract("zip", &archive_path, &staging, "big", 0, 1024).unwrap_err();
1051        assert!(err.to_string().contains("decompress"), "{err}");
1052        let _ = fs::remove_dir_all(&h);
1053    }
1054
1055    #[test]
1056    fn rejects_unsupported_archive() {
1057        let err = extract(
1058            "tar.xz",
1059            Path::new("/x"),
1060            Path::new("/y"),
1061            "t",
1062            0,
1063            DEFAULT_MAX_DECOMPRESSED,
1064        )
1065        .unwrap_err();
1066        assert_eq!(err.area, Area::Inst);
1067    }
1068
1069    // M5: an archive containing a symlink whose target escapes the tree (here an
1070    // absolute path), followed by a write through that link, must be rejected.
1071    #[test]
1072    fn rejects_symlink_escape_archive() {
1073        use flate2::write::GzEncoder;
1074        use flate2::Compression;
1075
1076        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
1077        // A symlink `evil` -> `/tmp/escape-target` (absolute).
1078        let mut link = tar::Header::new_gnu();
1079        link.set_entry_type(tar::EntryType::Symlink);
1080        link.set_size(0);
1081        link.set_mode(0o777);
1082        builder
1083            .append_link(&mut link, "evil", "/tmp/vanta-escape-target")
1084            .unwrap();
1085        // A regular write through the link path.
1086        let payload = b"pwned";
1087        let mut f = tar::Header::new_gnu();
1088        f.set_size(payload.len() as u64);
1089        f.set_mode(0o644);
1090        f.set_cksum();
1091        builder.append_data(&mut f, "evil", &payload[..]).unwrap();
1092        let bytes = builder.into_inner().unwrap().finish().unwrap();
1093
1094        let h = home("symlink");
1095        let store = Store::open(&h).unwrap();
1096        let archive_path = store.downloads_dir().join("evil.tar.gz");
1097        fs::write(&archive_path, &bytes).unwrap();
1098        let staging = store.new_staging().unwrap();
1099        let err = extract(
1100            "tar.gz",
1101            &archive_path,
1102            &staging,
1103            "tool",
1104            0,
1105            DEFAULT_MAX_DECOMPRESSED,
1106        )
1107        .unwrap_err();
1108        assert_eq!(err.area, Area::Inst);
1109        assert!(!Path::new("/tmp/vanta-escape-target").exists());
1110        let _ = fs::remove_dir_all(&h);
1111    }
1112
1113    // M8: a highly compressible archive that decompresses past the cap aborts.
1114    #[test]
1115    fn rejects_decompression_bomb() {
1116        use flate2::write::GzEncoder;
1117        use flate2::Compression;
1118
1119        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
1120        let big = vec![0u8; 1_000_000]; // 1 MB of zeros, compresses tiny
1121        let mut header = tar::Header::new_gnu();
1122        header.set_size(big.len() as u64);
1123        header.set_mode(0o644);
1124        header.set_cksum();
1125        builder.append_data(&mut header, "big", &big[..]).unwrap();
1126        let bytes = builder.into_inner().unwrap().finish().unwrap();
1127
1128        let h = home("bomb");
1129        let store = Store::open(&h).unwrap();
1130        let archive_path = store.downloads_dir().join("bomb.tar.gz");
1131        fs::write(&archive_path, &bytes).unwrap();
1132        let staging = store.new_staging().unwrap();
1133        // Cap well below the 1 MB payload → extraction must fail.
1134        let err = extract("tar.gz", &archive_path, &staging, "tool", 0, 4096).unwrap_err();
1135        assert_eq!(err.area, Area::Inst);
1136        let _ = fs::remove_dir_all(&h);
1137    }
1138}