Skip to main content

vanta_install/
lib.rs

1//! `vanta-install` — the install engine.
2//!
3//! Drives the lifecycle stages `[4 Fetch]`..`[8 Commit]` (`docs/08-installation.md`)
4//! for a resolved artifact: download (mirror-aware, resumable), verify the
5//! checksum (fail-closed), materialize (extract) into a staging tree, publish it
6//! atomically into the content-addressed store, and record a new generation.
7//!
8//! The entry point takes a resolved [`Artifact`] (produced by `vanta-resolve`).
9//! Supported archive formats: `tar.gz`/`tgz`, `tar.xz`/`txz`, `zip`, and `raw`.
10#![forbid(unsafe_code)]
11
12use std::fs;
13use std::io::Read;
14use std::path::{Path, PathBuf};
15use vanta_core::{Area, Artifact, BuildRecipe, Platform, StoreKey, VtaError, VtaResult};
16use vanta_net::Downloader;
17use vanta_security::Policy;
18use vanta_state::{GenerationRecord, State, StoreEntryMeta};
19use vanta_store::Store;
20
21/// Observes the progress of an [`Engine::install_artifact_reported`] run so a
22/// caller (the CLI) can render download bars and phase spinners without this
23/// crate depending on a UI crate. All methods have no-op defaults; the unit
24/// type `()` implements it as a fully silent reporter.
25pub trait Reporter {
26    /// The fetch stage is about to begin; `total` is the artifact's declared
27    /// size in bytes when known (used as the download bar's length).
28    fn fetch_start(&self, total: Option<u64>) {
29        let _ = total;
30    }
31    /// `n` more bytes have been downloaded.
32    fn fetch_inc(&self, n: u64) {
33        let _ = n;
34    }
35    /// A new post-fetch phase has begun (e.g. `"verifying"`, `"extracting"`).
36    fn phase(&self, name: &str) {
37        let _ = name;
38    }
39}
40
41/// Silent reporter: the default when no progress UI is wired in.
42impl Reporter for () {}
43
44/// Default ceiling on the total decompressed size of an archive (audit M8). A
45/// gzip bomb that would expand past this aborts extraction rather than filling
46/// the disk. Overridable via [`Engine::with_max_decompressed`].
47pub const DEFAULT_MAX_DECOMPRESSED: u64 = 2 * 1024 * 1024 * 1024; // 2 GiB
48
49/// The install engine, bound to a `$VANTA_HOME`.
50pub struct Engine {
51    store: Store,
52    state: State,
53    downloader: Downloader,
54    home: PathBuf,
55    /// Verification policy (audit H2). When `require_signature` is set, a missing
56    /// or untrusted signature is a hard error (fail-closed).
57    policy: Policy,
58    /// Hard ceiling on decompressed archive bytes (audit M8).
59    max_decompressed: u64,
60}
61
62impl Engine {
63    /// Open the engine over `home` (`$VANTA_HOME`) with the default (permissive)
64    /// policy — checksum-gated, signatures verified when present. Use
65    /// [`Engine::open_with_policy`] to require signatures.
66    pub fn open(home: impl AsRef<Path>) -> VtaResult<Engine> {
67        Self::open_with_policy(home, Policy::default())
68    }
69
70    /// Open the engine with an explicit verification [`Policy`] (audit H2).
71    pub fn open_with_policy(home: impl AsRef<Path>, policy: Policy) -> VtaResult<Engine> {
72        let home = home.as_ref().to_path_buf();
73        let store = Store::open(&home)?;
74        let state = State::open(&home.join("state.db"))?;
75        let downloader = Downloader::new()?;
76        Ok(Engine {
77            store,
78            state,
79            downloader,
80            home,
81            policy,
82            max_decompressed: DEFAULT_MAX_DECOMPRESSED,
83        })
84    }
85
86    /// Override the decompressed-size ceiling (audit M8).
87    pub fn with_max_decompressed(mut self, max: u64) -> Self {
88        self.max_decompressed = max;
89        self
90    }
91
92    /// Borrow the underlying store / state (for `gc`, `which`, etc.).
93    pub fn store(&self) -> &Store {
94        &self.store
95    }
96    pub fn state(&self) -> &State {
97        &self.state
98    }
99
100    /// Install one resolved artifact for the current platform, returning its
101    /// store key. Fetch → verify → materialize → publish → commit a generation.
102    /// A store hit short-circuits fetch/verify/materialize.
103    pub fn install_artifact(
104        &self,
105        tool: &str,
106        version: &str,
107        artifact: &Artifact,
108    ) -> VtaResult<StoreKey> {
109        self.install_artifact_reported(tool, version, artifact, &())
110    }
111
112    /// Like [`Engine::install_artifact`], but drives `reporter` with download
113    /// byte counts and phase transitions so a caller can render progress.
114    pub fn install_artifact_reported(
115        &self,
116        tool: &str,
117        version: &str,
118        artifact: &Artifact,
119        reporter: &dyn Reporter,
120    ) -> VtaResult<StoreKey> {
121        // Policy precheck (audit H2): when a signature is required, an artifact
122        // lacking a signature OR a *trusted* signing key (the resolver drops
123        // untrusted keys, audit C1) is refused — fail-closed, before any I/O.
124        let has_trusted_sig = artifact.signature.is_some() && artifact.signature_key.is_some();
125        if self.policy.require_signature && !has_trusted_sig {
126            return Err(VtaError::new(
127                Area::Vrf,
128                3,
129                format!(
130                    "signature required by policy but `{tool} {version}` is unsigned \
131                     or its signing key is not trusted"
132                ),
133            ));
134        }
135
136        // [3 Plan] — if the lock already named a key and it is present, reuse it
137        // ONLY if it still verifies (audit H4): a store hit must not be trusted
138        // blindly, since the entry could have been poisoned (audit H3) or the
139        // lockfile's `store_key` is attacker-influenceable. On mismatch, drop the
140        // bad entry and fall through to a fresh fetch + verify.
141        if let Some(key) = &artifact.store_key {
142            if self.store.has(key) {
143                if self.store.verify_entry(key)? {
144                    self.link_bins(key, &artifact.bin)?;
145                    self.record(tool, version, key, &artifact.checksum.value)?;
146                    return Ok(key.clone());
147                }
148                self.store.remove_entry(key)?;
149            }
150        }
151
152        // [4 Fetch] — cap downloaded bytes at the declared size when known (M8).
153        let dl = self
154            .store
155            .downloads_dir()
156            .join(format!("incoming-{tool}-{}", std::process::id()));
157        let mut urls = vec![artifact.url.clone()];
158        urls.extend(artifact.mirrors.clone());
159        reporter.fetch_start(artifact.size);
160        self.downloader.download_any_with_progress(
161            &urls,
162            &dl,
163            artifact.size,
164            Some(&|n| reporter.fetch_inc(n)),
165        )?;
166
167        // [5 Verify] — fail closed (centralized in vanta-security).
168        reporter.phase("verifying");
169        if let Err(e) =
170            vanta_security::verify_file(&dl, &artifact.checksum.algo, &artifact.checksum.value)
171        {
172            let _ = fs::remove_file(&dl);
173            return Err(e);
174        }
175        // Signature verification when the registry pinned a signature + trusted
176        // key. The key's trust is established upstream (audit C1, in the resolver);
177        // by this point a present `signature_key` is one we trust.
178        if let (Some(sig), Some(key_text)) = (&artifact.signature, &artifact.signature_key) {
179            let key = vanta_security::parse_minisign_pubkey(key_text)?;
180            let bytes = fs::read(&dl).map_err(|e| io(&dl, e))?;
181            if let Err(e) = vanta_security::minisign_verify(&bytes, sig, &key) {
182                let _ = fs::remove_file(&dl);
183                return Err(e);
184            }
185        }
186
187        // [6 Materialize]
188        let name = artifact
189            .bin
190            .first()
191            .map(|b| basename(b))
192            .unwrap_or_else(|| tool.to_string());
193        // The tree that will be published into the store.
194        let staging = self.store.new_staging()?;
195        if let Some(recipe) = &artifact.build {
196            // Source build: extract the (verified) source into a scratch tree,
197            // compile it with the signed recipe, and publish the resulting
198            // install prefix (`staging`) rather than the source.
199            reporter.phase("extracting source");
200            let src = self.store.new_staging()?;
201            extract(
202                &artifact.archive,
203                &dl,
204                &src,
205                &name,
206                artifact.strip,
207                self.max_decompressed,
208            )?;
209            let _ = fs::remove_file(&dl);
210            run_build(recipe, &src, &staging, tool, version, reporter)?;
211            let _ = fs::remove_dir_all(&src);
212        } else {
213            reporter.phase("extracting");
214            extract(
215                &artifact.archive,
216                &dl,
217                &staging,
218                &name,
219                artifact.strip,
220                self.max_decompressed,
221            )?;
222            let _ = fs::remove_file(&dl);
223        }
224
225        // [6 Materialize, cont.] atomic publish into the store.
226        let key = self.store.publish_tree(&staging)?;
227
228        // [7 Link] expose the tool's executables on PATH via ~/.vanta/bin.
229        self.link_bins(&key, &artifact.bin)?;
230
231        // [8 Commit]
232        self.record(tool, version, &key, &artifact.checksum.value)?;
233        Ok(key)
234    }
235
236    /// Link a store entry's declared executables into `~/.vanta/bin` (placed on
237    /// PATH by the shell hook). Per-directory environment views are composed by
238    /// `vanta-env` (`docs/10-environments.md`).
239    fn link_bins(&self, key: &StoreKey, bins: &[String]) -> VtaResult<()> {
240        let bin_dir = self.home.join("bin");
241        fs::create_dir_all(&bin_dir).map_err(|e| io(&bin_dir, e))?;
242        let entry = self.store.entry_path(key);
243        for bin in bins {
244            let src = entry.join(bin);
245            if src.exists() {
246                let dst = bin_dir.join(basename(bin));
247                vanta_store::link_best(&src, &dst)?;
248            }
249        }
250        Ok(())
251    }
252
253    fn record(&self, tool: &str, version: &str, key: &StoreKey, sha256: &str) -> VtaResult<()> {
254        let platform = Platform::current().token();
255        self.state.put_store_entry(
256            key.as_str(),
257            &StoreEntryMeta {
258                tool: tool.to_string(),
259                version: version.to_string(),
260                platform,
261                size: 0,
262                sha256: sha256.to_string(),
263            },
264        )?;
265        let parent = self.state.current()?;
266        let id = parent.map(|c| c + 1).unwrap_or(1);
267        self.state.append_generation(&GenerationRecord {
268            id,
269            parent,
270            command: format!("vanta add {tool}@{version}"),
271            reason: "add".to_string(),
272            tools: vec![(tool.to_string(), key.as_str().to_string())],
273        })?;
274        self.state.set_current(id)?;
275        Ok(())
276    }
277
278    /// Store keys referenced by the active generation.
279    fn active_store_keys(&self) -> VtaResult<Vec<StoreKey>> {
280        let mut keys = Vec::new();
281        if let Some(current) = self.state.current()? {
282            if let Some(gen) = self.state.get_generation(current)? {
283                for (_, k) in gen.tools {
284                    if let Ok(sk) = StoreKey::new(k) {
285                        keys.push(sk);
286                    }
287                }
288            }
289        }
290        Ok(keys)
291    }
292
293    /// Bundle the active generation's store entries into a portable archive
294    /// (`docs/13-offline.md`). Returns the number of entries written.
295    pub fn bundle_current(&self, out: &Path) -> VtaResult<usize> {
296        let keys = self.active_store_keys()?;
297        let file = fs::File::create(out).map_err(|e| io(out, e))?;
298        let enc = flate2::write::GzEncoder::new(file, flate2::Compression::default());
299        let mut builder = tar::Builder::new(enc);
300        let list = keys
301            .iter()
302            .map(|k| k.as_str())
303            .collect::<Vec<_>>()
304            .join("\n");
305        let mut header = tar::Header::new_gnu();
306        header.set_size(list.len() as u64);
307        header.set_mode(0o644);
308        header.set_cksum();
309        builder
310            .append_data(&mut header, "KEYS", list.as_bytes())
311            .map_err(|e| inst(format!("bundle KEYS: {e}")))?;
312        for key in &keys {
313            let dir = self.store.entry_path(key);
314            if dir.is_dir() {
315                builder
316                    .append_dir_all(key.as_str(), &dir)
317                    .map_err(|e| inst(format!("bundle {key}: {e}")))?;
318            }
319        }
320        let enc = builder
321            .into_inner()
322            .map_err(|e| inst(format!("bundle finalize: {e}")))?;
323        enc.finish()
324            .map_err(|e| inst(format!("bundle gzip: {e}")))?;
325        Ok(keys.len())
326    }
327
328    /// Restore store entries from a bundle, verifying each entry's integrity
329    /// against its content-addressed key. Returns the number newly imported.
330    pub fn restore(&self, bundle: &Path) -> VtaResult<usize> {
331        let file = fs::File::open(bundle).map_err(|e| io(bundle, e))?;
332        let gz = flate2::read::GzDecoder::new(file);
333        let mut archive = tar::Archive::new(gz);
334        let staging = self.store.new_staging()?;
335        archive
336            .unpack(&staging)
337            .map_err(|e| inst(format!("restore unpack: {e}")))?;
338        let keys_txt =
339            fs::read_to_string(staging.join("KEYS")).map_err(|e| io(&staging.join("KEYS"), e))?;
340        let mut restored = 0;
341        for line in keys_txt.lines() {
342            let key = line.trim();
343            if key.is_empty() {
344                continue;
345            }
346            // `StoreKey::new` enforces the fixed-width lowercase-hex shape (M7),
347            // so `staging.join(key)` below cannot traverse out of staging.
348            let sk = StoreKey::new(key)?;
349            let dst = self.store.entry_path(&sk);
350            if dst.exists() {
351                // Already present (and immutable + verified at insert); nothing
352                // to import for this key.
353                continue;
354            }
355            let src = staging.join(key);
356            if !src.is_dir() {
357                continue;
358            }
359            // Audit H3: verify the staged subtree hashes to its claimed key
360            // BEFORE publishing it into the canonical store. A bundle whose
361            // contents do not match the `blake3-<hash>` dir name is rejected and
362            // the store is left unchanged (the staging dir is removed below).
363            let actual = vanta_store::hash_tree(&src)?;
364            if actual != sk.as_str() {
365                let _ = fs::remove_dir_all(&staging);
366                return Err(VtaError::new(
367                    Area::Vrf,
368                    1,
369                    format!("bundled entry {key} failed integrity verification (content mismatch)"),
370                ));
371            }
372            // Bundled entries are read-only; add write so the dir can be moved.
373            let _ = vanta_store::ensure_writable(&src);
374            fs::rename(&src, &dst).map_err(|e| io(&dst, e))?;
375            restored += 1;
376        }
377        let _ = fs::remove_dir_all(&staging);
378        Ok(restored)
379    }
380
381    /// Remove a tool: record a new generation without it and unlink its primary
382    /// executable. Returns whether the tool was present.
383    pub fn remove(&self, tool: &str) -> VtaResult<bool> {
384        let current = match self.state.current()? {
385            Some(c) => c,
386            None => return Ok(false),
387        };
388        let gen = match self.state.get_generation(current)? {
389            Some(g) => g,
390            None => return Ok(false),
391        };
392        if !gen.tools.iter().any(|(t, _)| t == tool) {
393            return Ok(false);
394        }
395        let tools: Vec<(String, String)> = gen
396            .tools
397            .iter()
398            .filter(|(t, _)| t != tool)
399            .cloned()
400            .collect();
401        let id = current + 1;
402        self.state.append_generation(&GenerationRecord {
403            id,
404            parent: Some(current),
405            command: format!("vanta remove {tool}"),
406            reason: "remove".to_string(),
407            tools,
408        })?;
409        self.state.set_current(id)?;
410        let _ = fs::remove_file(self.home.join("bin").join(tool));
411        Ok(true)
412    }
413}
414
415fn inst(msg: String) -> VtaError {
416    VtaError::new(Area::Inst, 1, msg)
417}
418
419/// Compile a source tree into an install prefix using a signed [`BuildRecipe`].
420///
421/// Trust model: the recipe comes from the root-signed registry index (not the
422/// fetched source, not the user's environment), so the *commands* are trusted
423/// input; the *source bytes* were checksum-verified before extraction. Each
424/// step runs in `src_dir` with `{prefix}`/`{jobs}` substituted and `PREFIX`
425/// exported; the tool's compiler/`make` are resolved from the host `PATH`
426/// (a documented system dependency, like Homebrew/asdf source builds). Build
427/// output streams to the user's terminal. A non-zero exit aborts the install
428/// fail-closed, so a broken build never publishes a partial tree.
429fn run_build(
430    recipe: &BuildRecipe,
431    src_dir: &Path,
432    prefix: &Path,
433    tool: &str,
434    version: &str,
435    reporter: &dyn Reporter,
436) -> VtaResult<()> {
437    // Autotools builds expect the prefix to exist.
438    fs::create_dir_all(prefix).map_err(|e| io(prefix, e))?;
439    let prefix_str = prefix.to_string_lossy().to_string();
440    let jobs = std::thread::available_parallelism()
441        .map(|n| n.get())
442        .unwrap_or(1)
443        .to_string();
444
445    for (i, step) in recipe.steps.iter().enumerate() {
446        let Some((prog, rest)) = step.split_first() else {
447            continue; // skip an empty step defensively
448        };
449        let args: Vec<String> = rest
450            .iter()
451            .map(|a| a.replace("{prefix}", &prefix_str).replace("{jobs}", &jobs))
452            .collect();
453        let prog = prog
454            .replace("{prefix}", &prefix_str)
455            .replace("{jobs}", &jobs);
456
457        reporter.phase(&format!(
458            "building {tool} {version} [{}/{}]: {prog}",
459            i + 1,
460            recipe.steps.len()
461        ));
462        let status = std::process::Command::new(&prog)
463            .args(&args)
464            .current_dir(src_dir)
465            .env("PREFIX", &prefix_str)
466            .status()
467            .map_err(|e| {
468                VtaError::new(
469                    Area::Inst,
470                    4,
471                    format!("source build of `{tool} {version}`: cannot start `{prog}`: {e}"),
472                )
473            })?;
474        if !status.success() {
475            return Err(VtaError::new(
476                Area::Inst,
477                4,
478                format!(
479                    "source build of `{tool} {version}`: step `{prog}` failed ({status}); \
480                     ensure the required build toolchain (C compiler, make, …) is installed"
481                ),
482            ));
483        }
484    }
485
486    // Sanity: a build that installed nothing into the prefix is a failure, not
487    // a silent empty publish.
488    let populated = fs::read_dir(prefix)
489        .map(|mut d| d.next().is_some())
490        .unwrap_or(false);
491    if !populated {
492        return Err(VtaError::new(
493            Area::Inst,
494            4,
495            format!("source build of `{tool} {version}`: recipe produced an empty install prefix"),
496        ));
497    }
498    Ok(())
499}
500
501/// Materialize an artifact's bytes into `dest` according to its archive kind,
502/// stripping `strip` leading path components (the provider's layout).
503/// `max_decompressed` caps the total decompressed bytes (audit M8).
504pub fn extract(
505    archive: &str,
506    src: &Path,
507    dest: &Path,
508    raw_name: &str,
509    strip: u32,
510    max_decompressed: u64,
511) -> VtaResult<()> {
512    match archive {
513        "tar.gz" | "tgz" => extract_targz(src, dest, strip, max_decompressed),
514        "tar.xz" | "txz" => extract_tarxz(src, dest, strip, max_decompressed),
515        "zip" => extract_zip(src, dest, strip, max_decompressed),
516        "raw" => {
517            fs::create_dir_all(dest).map_err(|e| io(dest, e))?;
518            let out = dest.join(raw_name);
519            fs::copy(src, &out).map_err(|e| io(&out, e))?;
520            set_executable(&out);
521            Ok(())
522        }
523        other => Err(VtaError::new(
524            Area::Inst,
525            3,
526            format!(
527                "unsupported archive kind `{other}` (supported: tar.gz, tgz, tar.xz, txz, zip, raw)"
528            ),
529        )),
530    }
531}
532
533/// Extract a `.zip` under the same security model as the tar path: path
534/// traversal rejected (zip-slip), a shared decompressed-bytes budget across all
535/// entries (M8), link targets validated before creation (M5), and
536/// setuid/setgid/sticky stripped from materialized modes (M5).
537fn extract_zip(src: &Path, dest: &Path, strip: u32, max_decompressed: u64) -> VtaResult<()> {
538    use std::path::PathBuf;
539    let file = fs::File::open(src).map_err(|e| io(src, e))?;
540    let mut archive = zip::ZipArchive::new(file)
541        .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading zip archive: {e}")))?;
542    fs::create_dir_all(dest).map_err(|e| io(dest, e))?;
543    let dest_canon = dest.canonicalize().map_err(|e| io(dest, e))?;
544    // M8: one budget across all entries, so many mid-size entries cannot
545    // multiply past the ceiling any more than one huge entry can.
546    let mut budget = max_decompressed;
547
548    for i in 0..archive.len() {
549        let mut entry = archive
550            .by_index(i)
551            .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading zip entry: {e}")))?;
552        // `enclosed_name` refuses absolute paths and any `..` component.
553        let Some(path) = entry.enclosed_name() else {
554            return Err(traversal());
555        };
556        let stripped: PathBuf = path.components().skip(strip as usize).collect();
557        if stripped.as_os_str().is_empty() {
558            continue;
559        }
560        if escapes(&stripped) {
561            return Err(traversal());
562        }
563        let out = dest.join(&stripped);
564
565        if entry.is_dir() {
566            fs::create_dir_all(&out).map_err(|e| io(&out, e))?;
567            continue;
568        }
569
570        if let Some(parent) = out.parent() {
571            fs::create_dir_all(parent).map_err(|e| io(parent, e))?;
572            // M5: realpath of the parent must stay under the staging root
573            // (defeats symlinked ancestors created by earlier entries).
574            let parent_canon = parent.canonicalize().map_err(|e| io(parent, e))?;
575            if !parent_canon.starts_with(&dest_canon) {
576                return Err(traversal());
577            }
578        }
579
580        let mode = entry.unix_mode();
581        // M5: symlink entries (mode S_IFLNK) carry the target as file content.
582        // Validate the target exactly like the tar path before creating.
583        if mode.is_some_and(|m| m & 0o170000 == 0o120000) {
584            let mut target = String::new();
585            LimitReader::new(&mut entry, 4096)
586                .read_to_string(&mut target)
587                .map_err(|e| VtaError::new(Area::Inst, 1, format!("zip link target: {e}")))?;
588            let target_path = Path::new(&target);
589            let base = stripped.parent().unwrap_or_else(|| Path::new(""));
590            if link_target_escapes(base, target_path) {
591                return Err(VtaError::new(
592                    Area::Inst,
593                    1,
594                    format!(
595                        "archive link entry `{}` has an unsafe target `{target}` (rejected)",
596                        stripped.display()
597                    ),
598                ));
599            }
600            #[cfg(unix)]
601            std::os::unix::fs::symlink(target_path, &out).map_err(|e| io(&out, e))?;
602            // Non-unix: skip symlinks (tool zips for windows do not rely on them).
603            continue;
604        }
605
606        let mut writer = fs::File::create(&out).map_err(|e| io(&out, e))?;
607        let mut limited = LimitReader::new(&mut entry, budget);
608        let copied = std::io::copy(&mut limited, &mut writer)
609            .map_err(|e| VtaError::new(Area::Inst, 1, format!("unpacking zip entry: {e}")))?;
610        budget = budget.saturating_sub(copied);
611
612        // Apply the entry's permission bits sans special bits (M5); default to
613        // 0644 when the zip carries no unix modes (created on Windows).
614        #[cfg(unix)]
615        {
616            use std::os::unix::fs::PermissionsExt;
617            let safe = mode.map(|m| m & 0o777).unwrap_or(0o644);
618            let _ = fs::set_permissions(&out, fs::Permissions::from_mode(safe));
619        }
620        strip_special_bits(&out);
621    }
622    Ok(())
623}
624
625fn extract_targz(src: &Path, dest: &Path, strip: u32, max_decompressed: u64) -> VtaResult<()> {
626    let file = fs::File::open(src).map_err(|e| io(src, e))?;
627    // M8: bound total decompressed bytes so a gzip bomb aborts rather than
628    // filling the disk.
629    let gz = LimitReader::new(flate2::read::GzDecoder::new(file), max_decompressed);
630    extract_tar(gz, dest, strip)
631}
632
633fn extract_tarxz(src: &Path, dest: &Path, strip: u32, max_decompressed: u64) -> VtaResult<()> {
634    let file = fs::File::open(src).map_err(|e| io(src, e))?;
635    // Same M8 decompression-bomb bound as the gzip path.
636    let xz = LimitReader::new(xz2::read::XzDecoder::new(file), max_decompressed);
637    extract_tar(xz, dest, strip)
638}
639
640/// Walk a decompressed tar stream and materialize it under `dest`, stripping
641/// `strip` leading components. Shared by the gzip and xz paths; the caller wraps
642/// the decoder in a [`LimitReader`] to enforce the decompression budget (M8).
643fn extract_tar<R: Read>(reader: R, dest: &Path, strip: u32) -> VtaResult<()> {
644    use std::path::PathBuf;
645    let mut archive = tar::Archive::new(reader);
646    // We re-apply a sanitized mode after unpack (M5: strip setuid/setgid), so we
647    // do not need tar to preserve raw permission bits.
648    archive.set_preserve_permissions(true);
649    let dest_canon = dest.canonicalize().map_err(|e| io(dest, e))?;
650    let entries = archive
651        .entries()
652        .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading archive: {e}")))?;
653    for entry in entries {
654        let mut entry = entry
655            .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading archive entry: {e}")))?;
656        let entry_type = entry.header().entry_type();
657        let path = entry
658            .path()
659            .map_err(|e| VtaError::new(Area::Inst, 1, format!("entry path: {e}")))?
660            .into_owned();
661        let stripped: PathBuf = path.components().skip(strip as usize).collect();
662        if stripped.as_os_str().is_empty() {
663            continue;
664        }
665        // Reject anything that could escape the destination (zip-slip / traversal).
666        if escapes(&stripped) {
667            return Err(traversal());
668        }
669        // M5: link entries (symlink/hardlink) get an extra check — their
670        // *target*, resolved against the link's own directory, must not escape
671        // the staging tree. `entry.unpack` would otherwise create a link
672        // pointing outside, which a later entry could write through. Internal
673        // `..` that resolves back inside the tree is allowed (e.g. node's
674        // `bin/corepack -> ../lib/node_modules/...`). A symlink target is
675        // relative to the link's directory; a tar hardlink target is relative
676        // to the archive root.
677        if matches!(entry_type, tar::EntryType::Symlink | tar::EntryType::Link) {
678            let target = entry
679                .link_name()
680                .map_err(|e| VtaError::new(Area::Inst, 1, format!("link target: {e}")))?
681                .map(|c| c.into_owned())
682                .unwrap_or_default();
683            let base = if entry_type == tar::EntryType::Symlink {
684                stripped.parent().unwrap_or_else(|| Path::new(""))
685            } else {
686                Path::new("")
687            };
688            if link_target_escapes(base, &target) {
689                return Err(VtaError::new(
690                    Area::Inst,
691                    1,
692                    format!(
693                        "archive link entry `{}` has an unsafe target `{}` (rejected)",
694                        stripped.display(),
695                        target.display()
696                    ),
697                ));
698            }
699        }
700        let out = dest.join(&stripped);
701        if let Some(parent) = out.parent() {
702            fs::create_dir_all(parent).map_err(|e| io(parent, e))?;
703            // M5: after the parent exists, canonicalize it and confirm the
704            // realpath still lies under the staging root (defeats symlinked
705            // ancestors pointing elsewhere).
706            let parent_canon = parent.canonicalize().map_err(|e| io(parent, e))?;
707            if !parent_canon.starts_with(&dest_canon) {
708                return Err(traversal());
709            }
710        }
711        entry
712            .unpack(&out)
713            .map_err(|e| VtaError::new(Area::Inst, 1, format!("unpacking entry: {e}")))?;
714        // M5: strip setuid/setgid/sticky bits from materialized files.
715        strip_special_bits(&out);
716    }
717    Ok(())
718}
719
720/// Whether a link `target`, interpreted relative to `base_dir` (the directory
721/// the link lives in — empty for archive-root-relative hardlink targets),
722/// stays within the extraction root. Lexical only (no filesystem access): an
723/// absolute target, a rooted/prefixed component, or a `..` sequence that pops
724/// above the root all escape. Internal `..` that resolves back inside the tree
725/// (e.g. node's `bin/corepack -> ../lib/node_modules/...`) is permitted.
726fn link_target_escapes(base_dir: &Path, target: &Path) -> bool {
727    use std::path::Component;
728    if target.is_absolute() {
729        return true;
730    }
731    let mut depth: i64 = 0;
732    for c in base_dir.components() {
733        match c {
734            Component::Normal(_) => depth += 1,
735            Component::ParentDir => depth -= 1,
736            _ => {}
737        }
738    }
739    for c in target.components() {
740        match c {
741            Component::Normal(_) => depth += 1,
742            Component::CurDir => {}
743            Component::ParentDir => {
744                depth -= 1;
745                if depth < 0 {
746                    return true;
747                }
748            }
749            Component::RootDir | Component::Prefix(_) => return true,
750        }
751    }
752    depth < 0
753}
754
755/// Whether a relative path contains a component that would escape its base.
756fn escapes(p: &Path) -> bool {
757    use std::path::Component;
758    p.components().any(|c| {
759        matches!(
760            c,
761            Component::ParentDir | Component::RootDir | Component::Prefix(_)
762        )
763    })
764}
765
766fn traversal() -> VtaError {
767    VtaError::new(
768        Area::Inst,
769        1,
770        "archive entry escapes destination (path traversal rejected)".to_string(),
771    )
772}
773
774/// A reader that errors once more than `limit` bytes have been read (audit M8).
775struct LimitReader<R> {
776    inner: R,
777    remaining: u64,
778}
779
780impl<R> LimitReader<R> {
781    fn new(inner: R, limit: u64) -> Self {
782        LimitReader {
783            inner,
784            remaining: limit,
785        }
786    }
787}
788
789impl<R: Read> Read for LimitReader<R> {
790    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
791        let n = self.inner.read(buf)?;
792        let n64 = n as u64;
793        if n64 > self.remaining {
794            return Err(std::io::Error::new(
795                std::io::ErrorKind::InvalidData,
796                "decompressed size exceeds configured maximum (possible decompression bomb)",
797            ));
798        }
799        self.remaining -= n64;
800        Ok(n)
801    }
802}
803
804/// Strip setuid/setgid/sticky bits from a materialized path (audit M5).
805#[cfg(unix)]
806fn strip_special_bits(path: &Path) {
807    use std::os::unix::fs::PermissionsExt;
808    // Symlinks carry no meaningful permission bits; skip (and avoid following).
809    if let Ok(meta) = fs::symlink_metadata(path) {
810        if meta.file_type().is_symlink() {
811            return;
812        }
813        let mode = meta.permissions().mode();
814        let safe = mode & 0o777; // drop 0o7000 (setuid/setgid/sticky)
815        if safe != mode {
816            let mut perms = meta.permissions();
817            perms.set_mode(safe);
818            let _ = fs::set_permissions(path, perms);
819        }
820    }
821}
822
823#[cfg(not(unix))]
824fn strip_special_bits(_path: &Path) {}
825
826fn basename(p: &str) -> String {
827    p.rsplit(['/', '\\']).next().unwrap_or(p).to_string()
828}
829
830#[cfg(unix)]
831fn set_executable(path: &Path) {
832    use std::os::unix::fs::PermissionsExt;
833    if let Ok(meta) = fs::metadata(path) {
834        let mut perms = meta.permissions();
835        perms.set_mode(perms.mode() | 0o755);
836        let _ = fs::set_permissions(path, perms);
837    }
838}
839
840#[cfg(not(unix))]
841fn set_executable(_path: &Path) {}
842
843fn io(path: &Path, e: std::io::Error) -> VtaError {
844    VtaError::new(Area::Inst, 2, format!("{}: {e}", path.display()))
845}
846
847#[cfg(test)]
848mod tests {
849    use super::*;
850
851    fn home(tag: &str) -> PathBuf {
852        let p = std::env::temp_dir().join(format!("vanta-install-{}-{}", tag, std::process::id()));
853        let _ = fs::remove_dir_all(&p);
854        p
855    }
856
857    #[test]
858    fn engine_opens_and_creates_state() {
859        let h = home("open");
860        let e = Engine::open(&h).unwrap();
861        assert_eq!(
862            e.state().schema_version().unwrap(),
863            vanta_state::SCHEMA_VERSION
864        );
865        let _ = fs::remove_dir_all(&h);
866    }
867
868    #[test]
869    fn extracts_targz_then_publishes() {
870        use flate2::write::GzEncoder;
871        use flate2::Compression;
872
873        // Build a small .tar.gz in memory: one file `bin/tool`.
874        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
875        let mut header = tar::Header::new_gnu();
876        let payload = b"#!/bin/sh\necho hi\n";
877        header.set_size(payload.len() as u64);
878        header.set_mode(0o755);
879        header.set_cksum();
880        builder
881            .append_data(&mut header, "bin/tool", &payload[..])
882            .unwrap();
883        let gz = builder.into_inner().unwrap();
884        let bytes = gz.finish().unwrap();
885
886        let h = home("targz");
887        let store = Store::open(&h).unwrap();
888        let archive_path = store.downloads_dir().join("a.tar.gz");
889        fs::write(&archive_path, &bytes).unwrap();
890
891        let staging = store.new_staging().unwrap();
892        extract(
893            "tar.gz",
894            &archive_path,
895            &staging,
896            "tool",
897            0,
898            DEFAULT_MAX_DECOMPRESSED,
899        )
900        .unwrap();
901        assert!(staging.join("bin/tool").exists());
902
903        let key = store.publish_tree(&staging).unwrap();
904        assert!(store.has(&key));
905        assert!(store.verify_entry(&key).unwrap());
906        let _ = fs::remove_dir_all(&h);
907    }
908
909    #[test]
910    fn extracts_tarxz() {
911        use std::io::Write;
912        // Build an .tar.xz in memory: one file bin/tool.
913        let mut tar_bytes = Vec::new();
914        {
915            let mut b = tar::Builder::new(&mut tar_bytes);
916            let payload = b"#!/bin/sh\necho xz\n";
917            let mut h = tar::Header::new_gnu();
918            h.set_size(payload.len() as u64);
919            h.set_mode(0o755);
920            h.set_cksum();
921            b.append_data(&mut h, "pkg-1.0/bin/tool", &payload[..])
922                .unwrap();
923            b.finish().unwrap();
924        }
925        let mut enc = xz2::write::XzEncoder::new(Vec::new(), 6);
926        enc.write_all(&tar_bytes).unwrap();
927        let xz = enc.finish().unwrap();
928
929        let h = home("tarxz");
930        let store = Store::open(&h).unwrap();
931        let archive_path = store.downloads_dir().join("a.tar.xz");
932        fs::write(&archive_path, &xz).unwrap();
933        let staging = store.new_staging().unwrap();
934        extract(
935            "tar.xz",
936            &archive_path,
937            &staging,
938            "tool",
939            1,
940            DEFAULT_MAX_DECOMPRESSED,
941        )
942        .unwrap();
943        assert!(staging.join("bin/tool").exists());
944        let _ = fs::remove_dir_all(&h);
945    }
946
947    #[test]
948    fn source_build_runs_recipe_into_prefix() {
949        let src = home("bld-src");
950        fs::create_dir_all(&src).unwrap();
951        let prefix = home("bld-pfx");
952        // A tiny two-step recipe exercising {prefix} substitution + ordering.
953        let recipe = BuildRecipe {
954            steps: vec![
955                vec!["sh".into(), "-c".into(), "mkdir -p {prefix}/bin".into()],
956                vec![
957                    "sh".into(),
958                    "-c".into(),
959                    "printf '#!/bin/sh\\necho hi\\n' > {prefix}/bin/tool && chmod +x {prefix}/bin/tool"
960                        .into(),
961                ],
962            ],
963        };
964        run_build(&recipe, &src, &prefix, "tool", "1.0", &()).unwrap();
965        let bin = prefix.join("bin/tool");
966        assert!(bin.exists(), "recipe should install bin/tool");
967        let out = std::process::Command::new(&bin).output().unwrap();
968        assert_eq!(String::from_utf8_lossy(&out.stdout).trim(), "hi");
969        let _ = fs::remove_dir_all(&src);
970        let _ = fs::remove_dir_all(&prefix);
971    }
972
973    #[test]
974    fn source_build_failing_step_aborts() {
975        let src = home("bld-fail-src");
976        fs::create_dir_all(&src).unwrap();
977        let prefix = home("bld-fail-pfx");
978        let recipe = BuildRecipe {
979            steps: vec![vec!["sh".into(), "-c".into(), "exit 3".into()]],
980        };
981        let err = run_build(&recipe, &src, &prefix, "tool", "1.0", &()).unwrap_err();
982        assert!(err.to_string().contains("failed"), "{err}");
983        let _ = fs::remove_dir_all(&src);
984        let _ = fs::remove_dir_all(&prefix);
985    }
986
987    #[test]
988    fn source_build_empty_prefix_rejected() {
989        let src = home("bld-empty-src");
990        fs::create_dir_all(&src).unwrap();
991        let prefix = home("bld-empty-pfx");
992        // Recipe "succeeds" but installs nothing → must not publish empty tree.
993        let recipe = BuildRecipe {
994            steps: vec![vec!["sh".into(), "-c".into(), "true".into()]],
995        };
996        let err = run_build(&recipe, &src, &prefix, "tool", "1.0", &()).unwrap_err();
997        assert!(err.to_string().contains("empty install prefix"), "{err}");
998        let _ = fs::remove_dir_all(&src);
999        let _ = fs::remove_dir_all(&prefix);
1000    }
1001
1002    #[test]
1003    fn link_target_guard_allows_internal_dotdot_rejects_escape() {
1004        use std::path::Path;
1005        // node's real symlink: bin/corepack -> ../lib/node_modules/... stays in.
1006        assert!(!link_target_escapes(
1007            Path::new("bin"),
1008            Path::new("../lib/node_modules/corepack/dist/corepack.js")
1009        ));
1010        // sibling reference stays in.
1011        assert!(!link_target_escapes(Path::new("bin"), Path::new("node")));
1012        // pops above root → escape.
1013        assert!(link_target_escapes(
1014            Path::new("bin"),
1015            Path::new("../../etc/passwd")
1016        ));
1017        // absolute → escape.
1018        assert!(link_target_escapes(
1019            Path::new("bin"),
1020            Path::new("/etc/passwd")
1021        ));
1022        // hardlink base is root: a leading `..` escapes immediately.
1023        assert!(link_target_escapes(Path::new(""), Path::new("../x")));
1024        assert!(!link_target_escapes(Path::new(""), Path::new("bin/node")));
1025    }
1026
1027    /// Build an in-memory zip: (path, mode, payload) triples.
1028    fn make_zip(entries: &[(&str, u32, &[u8])]) -> Vec<u8> {
1029        use std::io::Write;
1030        let mut w = zip::ZipWriter::new(std::io::Cursor::new(Vec::new()));
1031        for (name, mode, payload) in entries {
1032            let opts = zip::write::SimpleFileOptions::default().unix_permissions(*mode);
1033            w.start_file(*name, opts).unwrap();
1034            w.write_all(payload).unwrap();
1035        }
1036        w.finish().unwrap().into_inner()
1037    }
1038
1039    #[test]
1040    fn extracts_zip_with_strip_and_modes() {
1041        let h = home("zip");
1042        let store = Store::open(&h).unwrap();
1043        let bytes = make_zip(&[
1044            ("terraform_1.9.0/terraform", 0o755, b"#!/bin/sh\necho tf\n"),
1045            ("terraform_1.9.0/README.md", 0o644, b"docs"),
1046        ]);
1047        let archive_path = store.downloads_dir().join("a.zip");
1048        fs::write(&archive_path, &bytes).unwrap();
1049
1050        let staging = store.new_staging().unwrap();
1051        extract(
1052            "zip",
1053            &archive_path,
1054            &staging,
1055            "terraform",
1056            1,
1057            DEFAULT_MAX_DECOMPRESSED,
1058        )
1059        .unwrap();
1060        let bin = staging.join("terraform");
1061        assert!(bin.exists());
1062        assert!(staging.join("README.md").exists());
1063        #[cfg(unix)]
1064        {
1065            use std::os::unix::fs::PermissionsExt;
1066            let mode = fs::metadata(&bin).unwrap().permissions().mode();
1067            assert_eq!(mode & 0o777, 0o755, "exec bit preserved from zip modes");
1068        }
1069        let _ = fs::remove_dir_all(&h);
1070    }
1071
1072    #[test]
1073    fn zip_slip_rejected() {
1074        let h = home("zipslip");
1075        let store = Store::open(&h).unwrap();
1076        let bytes = make_zip(&[("../evil", 0o644, b"pwn")]);
1077        let archive_path = store.downloads_dir().join("evil.zip");
1078        fs::write(&archive_path, &bytes).unwrap();
1079
1080        let staging = store.new_staging().unwrap();
1081        let err = extract(
1082            "zip",
1083            &archive_path,
1084            &staging,
1085            "evil",
1086            0,
1087            DEFAULT_MAX_DECOMPRESSED,
1088        )
1089        .unwrap_err();
1090        assert!(err.to_string().contains("traversal"), "{err}");
1091        let _ = fs::remove_dir_all(&h);
1092    }
1093
1094    #[test]
1095    fn zip_decompression_budget_enforced() {
1096        let h = home("zipbomb");
1097        let store = Store::open(&h).unwrap();
1098        let big = vec![0u8; 64 * 1024];
1099        let bytes = make_zip(&[("big.bin", 0o644, &big[..])]);
1100        let archive_path = store.downloads_dir().join("big.zip");
1101        fs::write(&archive_path, &bytes).unwrap();
1102
1103        let staging = store.new_staging().unwrap();
1104        // Budget below the decompressed size must abort, not fill the disk.
1105        let err = extract("zip", &archive_path, &staging, "big", 0, 1024).unwrap_err();
1106        assert!(err.to_string().contains("decompress"), "{err}");
1107        let _ = fs::remove_dir_all(&h);
1108    }
1109
1110    #[test]
1111    fn rejects_unsupported_archive() {
1112        let err = extract(
1113            "tar.xz",
1114            Path::new("/x"),
1115            Path::new("/y"),
1116            "t",
1117            0,
1118            DEFAULT_MAX_DECOMPRESSED,
1119        )
1120        .unwrap_err();
1121        assert_eq!(err.area, Area::Inst);
1122    }
1123
1124    // M5: an archive containing a symlink whose target escapes the tree (here an
1125    // absolute path), followed by a write through that link, must be rejected.
1126    #[test]
1127    fn rejects_symlink_escape_archive() {
1128        use flate2::write::GzEncoder;
1129        use flate2::Compression;
1130
1131        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
1132        // A symlink `evil` -> `/tmp/escape-target` (absolute).
1133        let mut link = tar::Header::new_gnu();
1134        link.set_entry_type(tar::EntryType::Symlink);
1135        link.set_size(0);
1136        link.set_mode(0o777);
1137        builder
1138            .append_link(&mut link, "evil", "/tmp/vanta-escape-target")
1139            .unwrap();
1140        // A regular write through the link path.
1141        let payload = b"pwned";
1142        let mut f = tar::Header::new_gnu();
1143        f.set_size(payload.len() as u64);
1144        f.set_mode(0o644);
1145        f.set_cksum();
1146        builder.append_data(&mut f, "evil", &payload[..]).unwrap();
1147        let bytes = builder.into_inner().unwrap().finish().unwrap();
1148
1149        let h = home("symlink");
1150        let store = Store::open(&h).unwrap();
1151        let archive_path = store.downloads_dir().join("evil.tar.gz");
1152        fs::write(&archive_path, &bytes).unwrap();
1153        let staging = store.new_staging().unwrap();
1154        let err = extract(
1155            "tar.gz",
1156            &archive_path,
1157            &staging,
1158            "tool",
1159            0,
1160            DEFAULT_MAX_DECOMPRESSED,
1161        )
1162        .unwrap_err();
1163        assert_eq!(err.area, Area::Inst);
1164        assert!(!Path::new("/tmp/vanta-escape-target").exists());
1165        let _ = fs::remove_dir_all(&h);
1166    }
1167
1168    // M8: a highly compressible archive that decompresses past the cap aborts.
1169    #[test]
1170    fn rejects_decompression_bomb() {
1171        use flate2::write::GzEncoder;
1172        use flate2::Compression;
1173
1174        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
1175        let big = vec![0u8; 1_000_000]; // 1 MB of zeros, compresses tiny
1176        let mut header = tar::Header::new_gnu();
1177        header.set_size(big.len() as u64);
1178        header.set_mode(0o644);
1179        header.set_cksum();
1180        builder.append_data(&mut header, "big", &big[..]).unwrap();
1181        let bytes = builder.into_inner().unwrap().finish().unwrap();
1182
1183        let h = home("bomb");
1184        let store = Store::open(&h).unwrap();
1185        let archive_path = store.downloads_dir().join("bomb.tar.gz");
1186        fs::write(&archive_path, &bytes).unwrap();
1187        let staging = store.new_staging().unwrap();
1188        // Cap well below the 1 MB payload → extraction must fail.
1189        let err = extract("tar.gz", &archive_path, &staging, "tool", 0, 4096).unwrap_err();
1190        assert_eq!(err.area, Area::Inst);
1191        let _ = fs::remove_dir_all(&h);
1192    }
1193}