Skip to main content

vanta_install/
lib.rs

1//! `vanta-install` — the install engine.
2//!
3//! Drives the lifecycle stages `[4 Fetch]`..`[8 Commit]` (`docs/08-installation.md`)
4//! for a resolved artifact: download (mirror-aware, resumable), verify the
5//! checksum (fail-closed), materialize (extract) into a staging tree, publish it
6//! atomically into the content-addressed store, and record a new generation.
7//!
8//! The entry point takes a resolved [`Artifact`] (produced by `vanta-resolve`).
9//! Supported archive formats: `tar.gz`/`tgz`, `zip`, and `raw`.
10#![forbid(unsafe_code)]
11
12use std::fs;
13use std::io::Read;
14use std::path::{Path, PathBuf};
15use vanta_core::{Area, Artifact, Platform, StoreKey, VtaError, VtaResult};
16use vanta_net::Downloader;
17use vanta_security::Policy;
18use vanta_state::{GenerationRecord, State, StoreEntryMeta};
19use vanta_store::Store;
20
21/// Observes the progress of an [`Engine::install_artifact_reported`] run so a
22/// caller (the CLI) can render download bars and phase spinners without this
23/// crate depending on a UI crate. All methods have no-op defaults; the unit
24/// type `()` implements it as a fully silent reporter.
25pub trait Reporter {
26    /// The fetch stage is about to begin; `total` is the artifact's declared
27    /// size in bytes when known (used as the download bar's length).
28    fn fetch_start(&self, total: Option<u64>) {
29        let _ = total;
30    }
31    /// `n` more bytes have been downloaded.
32    fn fetch_inc(&self, n: u64) {
33        let _ = n;
34    }
35    /// A new post-fetch phase has begun (e.g. `"verifying"`, `"extracting"`).
36    fn phase(&self, name: &str) {
37        let _ = name;
38    }
39}
40
41/// Silent reporter: the default when no progress UI is wired in.
42impl Reporter for () {}
43
44/// Default ceiling on the total decompressed size of an archive (audit M8). A
45/// gzip bomb that would expand past this aborts extraction rather than filling
46/// the disk. Overridable via [`Engine::with_max_decompressed`].
47pub const DEFAULT_MAX_DECOMPRESSED: u64 = 2 * 1024 * 1024 * 1024; // 2 GiB
48
49/// The install engine, bound to a `$VANTA_HOME`.
50pub struct Engine {
51    store: Store,
52    state: State,
53    downloader: Downloader,
54    home: PathBuf,
55    /// Verification policy (audit H2). When `require_signature` is set, a missing
56    /// or untrusted signature is a hard error (fail-closed).
57    policy: Policy,
58    /// Hard ceiling on decompressed archive bytes (audit M8).
59    max_decompressed: u64,
60}
61
62impl Engine {
63    /// Open the engine over `home` (`$VANTA_HOME`) with the default (permissive)
64    /// policy — checksum-gated, signatures verified when present. Use
65    /// [`Engine::open_with_policy`] to require signatures.
66    pub fn open(home: impl AsRef<Path>) -> VtaResult<Engine> {
67        Self::open_with_policy(home, Policy::default())
68    }
69
70    /// Open the engine with an explicit verification [`Policy`] (audit H2).
71    pub fn open_with_policy(home: impl AsRef<Path>, policy: Policy) -> VtaResult<Engine> {
72        let home = home.as_ref().to_path_buf();
73        let store = Store::open(&home)?;
74        let state = State::open(&home.join("state.db"))?;
75        let downloader = Downloader::new()?;
76        Ok(Engine {
77            store,
78            state,
79            downloader,
80            home,
81            policy,
82            max_decompressed: DEFAULT_MAX_DECOMPRESSED,
83        })
84    }
85
86    /// Override the decompressed-size ceiling (audit M8).
87    pub fn with_max_decompressed(mut self, max: u64) -> Self {
88        self.max_decompressed = max;
89        self
90    }
91
92    /// Borrow the underlying store / state (for `gc`, `which`, etc.).
93    pub fn store(&self) -> &Store {
94        &self.store
95    }
96    pub fn state(&self) -> &State {
97        &self.state
98    }
99
100    /// Install one resolved artifact for the current platform, returning its
101    /// store key. Fetch → verify → materialize → publish → commit a generation.
102    /// A store hit short-circuits fetch/verify/materialize.
103    pub fn install_artifact(
104        &self,
105        tool: &str,
106        version: &str,
107        artifact: &Artifact,
108    ) -> VtaResult<StoreKey> {
109        self.install_artifact_reported(tool, version, artifact, &())
110    }
111
112    /// Like [`Engine::install_artifact`], but drives `reporter` with download
113    /// byte counts and phase transitions so a caller can render progress.
114    pub fn install_artifact_reported(
115        &self,
116        tool: &str,
117        version: &str,
118        artifact: &Artifact,
119        reporter: &dyn Reporter,
120    ) -> VtaResult<StoreKey> {
121        // Policy precheck (audit H2): when a signature is required, an artifact
122        // lacking a signature OR a *trusted* signing key (the resolver drops
123        // untrusted keys, audit C1) is refused — fail-closed, before any I/O.
124        let has_trusted_sig = artifact.signature.is_some() && artifact.signature_key.is_some();
125        if self.policy.require_signature && !has_trusted_sig {
126            return Err(VtaError::new(
127                Area::Vrf,
128                3,
129                format!(
130                    "signature required by policy but `{tool} {version}` is unsigned \
131                     or its signing key is not trusted"
132                ),
133            ));
134        }
135
136        // [3 Plan] — if the lock already named a key and it is present, reuse it
137        // ONLY if it still verifies (audit H4): a store hit must not be trusted
138        // blindly, since the entry could have been poisoned (audit H3) or the
139        // lockfile's `store_key` is attacker-influenceable. On mismatch, drop the
140        // bad entry and fall through to a fresh fetch + verify.
141        if let Some(key) = &artifact.store_key {
142            if self.store.has(key) {
143                if self.store.verify_entry(key)? {
144                    self.link_bins(key, &artifact.bin)?;
145                    self.record(tool, version, key, &artifact.checksum.value)?;
146                    return Ok(key.clone());
147                }
148                self.store.remove_entry(key)?;
149            }
150        }
151
152        // [4 Fetch] — cap downloaded bytes at the declared size when known (M8).
153        let dl = self
154            .store
155            .downloads_dir()
156            .join(format!("incoming-{tool}-{}", std::process::id()));
157        let mut urls = vec![artifact.url.clone()];
158        urls.extend(artifact.mirrors.clone());
159        reporter.fetch_start(artifact.size);
160        self.downloader.download_any_with_progress(
161            &urls,
162            &dl,
163            artifact.size,
164            Some(&|n| reporter.fetch_inc(n)),
165        )?;
166
167        // [5 Verify] — fail closed (centralized in vanta-security).
168        reporter.phase("verifying");
169        if let Err(e) =
170            vanta_security::verify_file(&dl, &artifact.checksum.algo, &artifact.checksum.value)
171        {
172            let _ = fs::remove_file(&dl);
173            return Err(e);
174        }
175        // Signature verification when the registry pinned a signature + trusted
176        // key. The key's trust is established upstream (audit C1, in the resolver);
177        // by this point a present `signature_key` is one we trust.
178        if let (Some(sig), Some(key_text)) = (&artifact.signature, &artifact.signature_key) {
179            let key = vanta_security::parse_minisign_pubkey(key_text)?;
180            let bytes = fs::read(&dl).map_err(|e| io(&dl, e))?;
181            if let Err(e) = vanta_security::minisign_verify(&bytes, sig, &key) {
182                let _ = fs::remove_file(&dl);
183                return Err(e);
184            }
185        }
186
187        // [6 Materialize]
188        reporter.phase("extracting");
189        let staging = self.store.new_staging()?;
190        let name = artifact
191            .bin
192            .first()
193            .map(|b| basename(b))
194            .unwrap_or_else(|| tool.to_string());
195        extract(
196            &artifact.archive,
197            &dl,
198            &staging,
199            &name,
200            artifact.strip,
201            self.max_decompressed,
202        )?;
203        let _ = fs::remove_file(&dl);
204
205        // [6 Materialize, cont.] atomic publish into the store.
206        let key = self.store.publish_tree(&staging)?;
207
208        // [7 Link] expose the tool's executables on PATH via ~/.vanta/bin.
209        self.link_bins(&key, &artifact.bin)?;
210
211        // [8 Commit]
212        self.record(tool, version, &key, &artifact.checksum.value)?;
213        Ok(key)
214    }
215
216    /// Link a store entry's declared executables into `~/.vanta/bin` (placed on
217    /// PATH by the shell hook). Per-directory environment views are composed by
218    /// `vanta-env` (`docs/10-environments.md`).
219    fn link_bins(&self, key: &StoreKey, bins: &[String]) -> VtaResult<()> {
220        let bin_dir = self.home.join("bin");
221        fs::create_dir_all(&bin_dir).map_err(|e| io(&bin_dir, e))?;
222        let entry = self.store.entry_path(key);
223        for bin in bins {
224            let src = entry.join(bin);
225            if src.exists() {
226                let dst = bin_dir.join(basename(bin));
227                vanta_store::link_best(&src, &dst)?;
228            }
229        }
230        Ok(())
231    }
232
233    fn record(&self, tool: &str, version: &str, key: &StoreKey, sha256: &str) -> VtaResult<()> {
234        let platform = Platform::current().token();
235        self.state.put_store_entry(
236            key.as_str(),
237            &StoreEntryMeta {
238                tool: tool.to_string(),
239                version: version.to_string(),
240                platform,
241                size: 0,
242                sha256: sha256.to_string(),
243            },
244        )?;
245        let parent = self.state.current()?;
246        let id = parent.map(|c| c + 1).unwrap_or(1);
247        self.state.append_generation(&GenerationRecord {
248            id,
249            parent,
250            command: format!("vanta add {tool}@{version}"),
251            reason: "add".to_string(),
252            tools: vec![(tool.to_string(), key.as_str().to_string())],
253        })?;
254        self.state.set_current(id)?;
255        Ok(())
256    }
257
258    /// Store keys referenced by the active generation.
259    fn active_store_keys(&self) -> VtaResult<Vec<StoreKey>> {
260        let mut keys = Vec::new();
261        if let Some(current) = self.state.current()? {
262            if let Some(gen) = self.state.get_generation(current)? {
263                for (_, k) in gen.tools {
264                    if let Ok(sk) = StoreKey::new(k) {
265                        keys.push(sk);
266                    }
267                }
268            }
269        }
270        Ok(keys)
271    }
272
273    /// Bundle the active generation's store entries into a portable archive
274    /// (`docs/13-offline.md`). Returns the number of entries written.
275    pub fn bundle_current(&self, out: &Path) -> VtaResult<usize> {
276        let keys = self.active_store_keys()?;
277        let file = fs::File::create(out).map_err(|e| io(out, e))?;
278        let enc = flate2::write::GzEncoder::new(file, flate2::Compression::default());
279        let mut builder = tar::Builder::new(enc);
280        let list = keys
281            .iter()
282            .map(|k| k.as_str())
283            .collect::<Vec<_>>()
284            .join("\n");
285        let mut header = tar::Header::new_gnu();
286        header.set_size(list.len() as u64);
287        header.set_mode(0o644);
288        header.set_cksum();
289        builder
290            .append_data(&mut header, "KEYS", list.as_bytes())
291            .map_err(|e| inst(format!("bundle KEYS: {e}")))?;
292        for key in &keys {
293            let dir = self.store.entry_path(key);
294            if dir.is_dir() {
295                builder
296                    .append_dir_all(key.as_str(), &dir)
297                    .map_err(|e| inst(format!("bundle {key}: {e}")))?;
298            }
299        }
300        let enc = builder
301            .into_inner()
302            .map_err(|e| inst(format!("bundle finalize: {e}")))?;
303        enc.finish()
304            .map_err(|e| inst(format!("bundle gzip: {e}")))?;
305        Ok(keys.len())
306    }
307
308    /// Restore store entries from a bundle, verifying each entry's integrity
309    /// against its content-addressed key. Returns the number newly imported.
310    pub fn restore(&self, bundle: &Path) -> VtaResult<usize> {
311        let file = fs::File::open(bundle).map_err(|e| io(bundle, e))?;
312        let gz = flate2::read::GzDecoder::new(file);
313        let mut archive = tar::Archive::new(gz);
314        let staging = self.store.new_staging()?;
315        archive
316            .unpack(&staging)
317            .map_err(|e| inst(format!("restore unpack: {e}")))?;
318        let keys_txt =
319            fs::read_to_string(staging.join("KEYS")).map_err(|e| io(&staging.join("KEYS"), e))?;
320        let mut restored = 0;
321        for line in keys_txt.lines() {
322            let key = line.trim();
323            if key.is_empty() {
324                continue;
325            }
326            // `StoreKey::new` enforces the fixed-width lowercase-hex shape (M7),
327            // so `staging.join(key)` below cannot traverse out of staging.
328            let sk = StoreKey::new(key)?;
329            let dst = self.store.entry_path(&sk);
330            if dst.exists() {
331                // Already present (and immutable + verified at insert); nothing
332                // to import for this key.
333                continue;
334            }
335            let src = staging.join(key);
336            if !src.is_dir() {
337                continue;
338            }
339            // Audit H3: verify the staged subtree hashes to its claimed key
340            // BEFORE publishing it into the canonical store. A bundle whose
341            // contents do not match the `blake3-<hash>` dir name is rejected and
342            // the store is left unchanged (the staging dir is removed below).
343            let actual = vanta_store::hash_tree(&src)?;
344            if actual != sk.as_str() {
345                let _ = fs::remove_dir_all(&staging);
346                return Err(VtaError::new(
347                    Area::Vrf,
348                    1,
349                    format!("bundled entry {key} failed integrity verification (content mismatch)"),
350                ));
351            }
352            // Bundled entries are read-only; add write so the dir can be moved.
353            let _ = vanta_store::ensure_writable(&src);
354            fs::rename(&src, &dst).map_err(|e| io(&dst, e))?;
355            restored += 1;
356        }
357        let _ = fs::remove_dir_all(&staging);
358        Ok(restored)
359    }
360
361    /// Remove a tool: record a new generation without it and unlink its primary
362    /// executable. Returns whether the tool was present.
363    pub fn remove(&self, tool: &str) -> VtaResult<bool> {
364        let current = match self.state.current()? {
365            Some(c) => c,
366            None => return Ok(false),
367        };
368        let gen = match self.state.get_generation(current)? {
369            Some(g) => g,
370            None => return Ok(false),
371        };
372        if !gen.tools.iter().any(|(t, _)| t == tool) {
373            return Ok(false);
374        }
375        let tools: Vec<(String, String)> = gen
376            .tools
377            .iter()
378            .filter(|(t, _)| t != tool)
379            .cloned()
380            .collect();
381        let id = current + 1;
382        self.state.append_generation(&GenerationRecord {
383            id,
384            parent: Some(current),
385            command: format!("vanta remove {tool}"),
386            reason: "remove".to_string(),
387            tools,
388        })?;
389        self.state.set_current(id)?;
390        let _ = fs::remove_file(self.home.join("bin").join(tool));
391        Ok(true)
392    }
393}
394
395fn inst(msg: String) -> VtaError {
396    VtaError::new(Area::Inst, 1, msg)
397}
398
399/// Materialize an artifact's bytes into `dest` according to its archive kind,
400/// stripping `strip` leading path components (the provider's layout).
401/// `max_decompressed` caps the total decompressed bytes (audit M8).
402pub fn extract(
403    archive: &str,
404    src: &Path,
405    dest: &Path,
406    raw_name: &str,
407    strip: u32,
408    max_decompressed: u64,
409) -> VtaResult<()> {
410    match archive {
411        "tar.gz" | "tgz" => extract_targz(src, dest, strip, max_decompressed),
412        "zip" => extract_zip(src, dest, strip, max_decompressed),
413        "raw" => {
414            fs::create_dir_all(dest).map_err(|e| io(dest, e))?;
415            let out = dest.join(raw_name);
416            fs::copy(src, &out).map_err(|e| io(&out, e))?;
417            set_executable(&out);
418            Ok(())
419        }
420        other => Err(VtaError::new(
421            Area::Inst,
422            3,
423            format!("unsupported archive kind `{other}` (supported: tar.gz, tgz, zip, raw)"),
424        )),
425    }
426}
427
428/// Extract a `.zip` under the same security model as the tar path: path
429/// traversal rejected (zip-slip), a shared decompressed-bytes budget across all
430/// entries (M8), link targets validated before creation (M5), and
431/// setuid/setgid/sticky stripped from materialized modes (M5).
432fn extract_zip(src: &Path, dest: &Path, strip: u32, max_decompressed: u64) -> VtaResult<()> {
433    use std::path::PathBuf;
434    let file = fs::File::open(src).map_err(|e| io(src, e))?;
435    let mut archive = zip::ZipArchive::new(file)
436        .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading zip archive: {e}")))?;
437    fs::create_dir_all(dest).map_err(|e| io(dest, e))?;
438    let dest_canon = dest.canonicalize().map_err(|e| io(dest, e))?;
439    // M8: one budget across all entries, so many mid-size entries cannot
440    // multiply past the ceiling any more than one huge entry can.
441    let mut budget = max_decompressed;
442
443    for i in 0..archive.len() {
444        let mut entry = archive
445            .by_index(i)
446            .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading zip entry: {e}")))?;
447        // `enclosed_name` refuses absolute paths and any `..` component.
448        let Some(path) = entry.enclosed_name() else {
449            return Err(traversal());
450        };
451        let stripped: PathBuf = path.components().skip(strip as usize).collect();
452        if stripped.as_os_str().is_empty() {
453            continue;
454        }
455        if escapes(&stripped) {
456            return Err(traversal());
457        }
458        let out = dest.join(&stripped);
459
460        if entry.is_dir() {
461            fs::create_dir_all(&out).map_err(|e| io(&out, e))?;
462            continue;
463        }
464
465        if let Some(parent) = out.parent() {
466            fs::create_dir_all(parent).map_err(|e| io(parent, e))?;
467            // M5: realpath of the parent must stay under the staging root
468            // (defeats symlinked ancestors created by earlier entries).
469            let parent_canon = parent.canonicalize().map_err(|e| io(parent, e))?;
470            if !parent_canon.starts_with(&dest_canon) {
471                return Err(traversal());
472            }
473        }
474
475        let mode = entry.unix_mode();
476        // M5: symlink entries (mode S_IFLNK) carry the target as file content.
477        // Validate the target exactly like the tar path before creating.
478        if mode.is_some_and(|m| m & 0o170000 == 0o120000) {
479            let mut target = String::new();
480            LimitReader::new(&mut entry, 4096)
481                .read_to_string(&mut target)
482                .map_err(|e| VtaError::new(Area::Inst, 1, format!("zip link target: {e}")))?;
483            let target_path = Path::new(&target);
484            let base = stripped.parent().unwrap_or_else(|| Path::new(""));
485            if link_target_escapes(base, target_path) {
486                return Err(VtaError::new(
487                    Area::Inst,
488                    1,
489                    format!(
490                        "archive link entry `{}` has an unsafe target `{target}` (rejected)",
491                        stripped.display()
492                    ),
493                ));
494            }
495            #[cfg(unix)]
496            std::os::unix::fs::symlink(target_path, &out).map_err(|e| io(&out, e))?;
497            // Non-unix: skip symlinks (tool zips for windows do not rely on them).
498            continue;
499        }
500
501        let mut writer = fs::File::create(&out).map_err(|e| io(&out, e))?;
502        let mut limited = LimitReader::new(&mut entry, budget);
503        let copied = std::io::copy(&mut limited, &mut writer)
504            .map_err(|e| VtaError::new(Area::Inst, 1, format!("unpacking zip entry: {e}")))?;
505        budget = budget.saturating_sub(copied);
506
507        // Apply the entry's permission bits sans special bits (M5); default to
508        // 0644 when the zip carries no unix modes (created on Windows).
509        #[cfg(unix)]
510        {
511            use std::os::unix::fs::PermissionsExt;
512            let safe = mode.map(|m| m & 0o777).unwrap_or(0o644);
513            let _ = fs::set_permissions(&out, fs::Permissions::from_mode(safe));
514        }
515        strip_special_bits(&out);
516    }
517    Ok(())
518}
519
520fn extract_targz(src: &Path, dest: &Path, strip: u32, max_decompressed: u64) -> VtaResult<()> {
521    use std::path::PathBuf;
522    let file = fs::File::open(src).map_err(|e| io(src, e))?;
523    // M8: bound total decompressed bytes so a gzip bomb aborts rather than
524    // filling the disk.
525    let gz = LimitReader::new(flate2::read::GzDecoder::new(file), max_decompressed);
526    let mut archive = tar::Archive::new(gz);
527    // We re-apply a sanitized mode after unpack (M5: strip setuid/setgid), so we
528    // do not need tar to preserve raw permission bits.
529    archive.set_preserve_permissions(true);
530    let dest_canon = dest.canonicalize().map_err(|e| io(dest, e))?;
531    let entries = archive
532        .entries()
533        .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading archive: {e}")))?;
534    for entry in entries {
535        let mut entry = entry
536            .map_err(|e| VtaError::new(Area::Inst, 1, format!("reading archive entry: {e}")))?;
537        let entry_type = entry.header().entry_type();
538        let path = entry
539            .path()
540            .map_err(|e| VtaError::new(Area::Inst, 1, format!("entry path: {e}")))?
541            .into_owned();
542        let stripped: PathBuf = path.components().skip(strip as usize).collect();
543        if stripped.as_os_str().is_empty() {
544            continue;
545        }
546        // Reject anything that could escape the destination (zip-slip / traversal).
547        if escapes(&stripped) {
548            return Err(traversal());
549        }
550        // M5: link entries (symlink/hardlink) get an extra check — their
551        // *target*, resolved against the link's own directory, must not escape
552        // the staging tree. `entry.unpack` would otherwise create a link
553        // pointing outside, which a later entry could write through. Internal
554        // `..` that resolves back inside the tree is allowed (e.g. node's
555        // `bin/corepack -> ../lib/node_modules/...`). A symlink target is
556        // relative to the link's directory; a tar hardlink target is relative
557        // to the archive root.
558        if matches!(entry_type, tar::EntryType::Symlink | tar::EntryType::Link) {
559            let target = entry
560                .link_name()
561                .map_err(|e| VtaError::new(Area::Inst, 1, format!("link target: {e}")))?
562                .map(|c| c.into_owned())
563                .unwrap_or_default();
564            let base = if entry_type == tar::EntryType::Symlink {
565                stripped.parent().unwrap_or_else(|| Path::new(""))
566            } else {
567                Path::new("")
568            };
569            if link_target_escapes(base, &target) {
570                return Err(VtaError::new(
571                    Area::Inst,
572                    1,
573                    format!(
574                        "archive link entry `{}` has an unsafe target `{}` (rejected)",
575                        stripped.display(),
576                        target.display()
577                    ),
578                ));
579            }
580        }
581        let out = dest.join(&stripped);
582        if let Some(parent) = out.parent() {
583            fs::create_dir_all(parent).map_err(|e| io(parent, e))?;
584            // M5: after the parent exists, canonicalize it and confirm the
585            // realpath still lies under the staging root (defeats symlinked
586            // ancestors pointing elsewhere).
587            let parent_canon = parent.canonicalize().map_err(|e| io(parent, e))?;
588            if !parent_canon.starts_with(&dest_canon) {
589                return Err(traversal());
590            }
591        }
592        entry
593            .unpack(&out)
594            .map_err(|e| VtaError::new(Area::Inst, 1, format!("unpacking entry: {e}")))?;
595        // M5: strip setuid/setgid/sticky bits from materialized files.
596        strip_special_bits(&out);
597    }
598    Ok(())
599}
600
601/// Whether a link `target`, interpreted relative to `base_dir` (the directory
602/// the link lives in — empty for archive-root-relative hardlink targets),
603/// stays within the extraction root. Lexical only (no filesystem access): an
604/// absolute target, a rooted/prefixed component, or a `..` sequence that pops
605/// above the root all escape. Internal `..` that resolves back inside the tree
606/// (e.g. node's `bin/corepack -> ../lib/node_modules/...`) is permitted.
607fn link_target_escapes(base_dir: &Path, target: &Path) -> bool {
608    use std::path::Component;
609    if target.is_absolute() {
610        return true;
611    }
612    let mut depth: i64 = 0;
613    for c in base_dir.components() {
614        match c {
615            Component::Normal(_) => depth += 1,
616            Component::ParentDir => depth -= 1,
617            _ => {}
618        }
619    }
620    for c in target.components() {
621        match c {
622            Component::Normal(_) => depth += 1,
623            Component::CurDir => {}
624            Component::ParentDir => {
625                depth -= 1;
626                if depth < 0 {
627                    return true;
628                }
629            }
630            Component::RootDir | Component::Prefix(_) => return true,
631        }
632    }
633    depth < 0
634}
635
636/// Whether a relative path contains a component that would escape its base.
637fn escapes(p: &Path) -> bool {
638    use std::path::Component;
639    p.components().any(|c| {
640        matches!(
641            c,
642            Component::ParentDir | Component::RootDir | Component::Prefix(_)
643        )
644    })
645}
646
647fn traversal() -> VtaError {
648    VtaError::new(
649        Area::Inst,
650        1,
651        "archive entry escapes destination (path traversal rejected)".to_string(),
652    )
653}
654
655/// A reader that errors once more than `limit` bytes have been read (audit M8).
656struct LimitReader<R> {
657    inner: R,
658    remaining: u64,
659}
660
661impl<R> LimitReader<R> {
662    fn new(inner: R, limit: u64) -> Self {
663        LimitReader {
664            inner,
665            remaining: limit,
666        }
667    }
668}
669
670impl<R: Read> Read for LimitReader<R> {
671    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
672        let n = self.inner.read(buf)?;
673        let n64 = n as u64;
674        if n64 > self.remaining {
675            return Err(std::io::Error::new(
676                std::io::ErrorKind::InvalidData,
677                "decompressed size exceeds configured maximum (possible decompression bomb)",
678            ));
679        }
680        self.remaining -= n64;
681        Ok(n)
682    }
683}
684
685/// Strip setuid/setgid/sticky bits from a materialized path (audit M5).
686#[cfg(unix)]
687fn strip_special_bits(path: &Path) {
688    use std::os::unix::fs::PermissionsExt;
689    // Symlinks carry no meaningful permission bits; skip (and avoid following).
690    if let Ok(meta) = fs::symlink_metadata(path) {
691        if meta.file_type().is_symlink() {
692            return;
693        }
694        let mode = meta.permissions().mode();
695        let safe = mode & 0o777; // drop 0o7000 (setuid/setgid/sticky)
696        if safe != mode {
697            let mut perms = meta.permissions();
698            perms.set_mode(safe);
699            let _ = fs::set_permissions(path, perms);
700        }
701    }
702}
703
704#[cfg(not(unix))]
705fn strip_special_bits(_path: &Path) {}
706
707fn basename(p: &str) -> String {
708    p.rsplit(['/', '\\']).next().unwrap_or(p).to_string()
709}
710
711#[cfg(unix)]
712fn set_executable(path: &Path) {
713    use std::os::unix::fs::PermissionsExt;
714    if let Ok(meta) = fs::metadata(path) {
715        let mut perms = meta.permissions();
716        perms.set_mode(perms.mode() | 0o755);
717        let _ = fs::set_permissions(path, perms);
718    }
719}
720
721#[cfg(not(unix))]
722fn set_executable(_path: &Path) {}
723
724fn io(path: &Path, e: std::io::Error) -> VtaError {
725    VtaError::new(Area::Inst, 2, format!("{}: {e}", path.display()))
726}
727
728#[cfg(test)]
729mod tests {
730    use super::*;
731
732    fn home(tag: &str) -> PathBuf {
733        let p = std::env::temp_dir().join(format!("vanta-install-{}-{}", tag, std::process::id()));
734        let _ = fs::remove_dir_all(&p);
735        p
736    }
737
738    #[test]
739    fn engine_opens_and_creates_state() {
740        let h = home("open");
741        let e = Engine::open(&h).unwrap();
742        assert_eq!(
743            e.state().schema_version().unwrap(),
744            vanta_state::SCHEMA_VERSION
745        );
746        let _ = fs::remove_dir_all(&h);
747    }
748
749    #[test]
750    fn extracts_targz_then_publishes() {
751        use flate2::write::GzEncoder;
752        use flate2::Compression;
753
754        // Build a small .tar.gz in memory: one file `bin/tool`.
755        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
756        let mut header = tar::Header::new_gnu();
757        let payload = b"#!/bin/sh\necho hi\n";
758        header.set_size(payload.len() as u64);
759        header.set_mode(0o755);
760        header.set_cksum();
761        builder
762            .append_data(&mut header, "bin/tool", &payload[..])
763            .unwrap();
764        let gz = builder.into_inner().unwrap();
765        let bytes = gz.finish().unwrap();
766
767        let h = home("targz");
768        let store = Store::open(&h).unwrap();
769        let archive_path = store.downloads_dir().join("a.tar.gz");
770        fs::write(&archive_path, &bytes).unwrap();
771
772        let staging = store.new_staging().unwrap();
773        extract(
774            "tar.gz",
775            &archive_path,
776            &staging,
777            "tool",
778            0,
779            DEFAULT_MAX_DECOMPRESSED,
780        )
781        .unwrap();
782        assert!(staging.join("bin/tool").exists());
783
784        let key = store.publish_tree(&staging).unwrap();
785        assert!(store.has(&key));
786        assert!(store.verify_entry(&key).unwrap());
787        let _ = fs::remove_dir_all(&h);
788    }
789
790    #[test]
791    fn link_target_guard_allows_internal_dotdot_rejects_escape() {
792        use std::path::Path;
793        // node's real symlink: bin/corepack -> ../lib/node_modules/... stays in.
794        assert!(!link_target_escapes(
795            Path::new("bin"),
796            Path::new("../lib/node_modules/corepack/dist/corepack.js")
797        ));
798        // sibling reference stays in.
799        assert!(!link_target_escapes(Path::new("bin"), Path::new("node")));
800        // pops above root → escape.
801        assert!(link_target_escapes(
802            Path::new("bin"),
803            Path::new("../../etc/passwd")
804        ));
805        // absolute → escape.
806        assert!(link_target_escapes(
807            Path::new("bin"),
808            Path::new("/etc/passwd")
809        ));
810        // hardlink base is root: a leading `..` escapes immediately.
811        assert!(link_target_escapes(Path::new(""), Path::new("../x")));
812        assert!(!link_target_escapes(Path::new(""), Path::new("bin/node")));
813    }
814
815    /// Build an in-memory zip: (path, mode, payload) triples.
816    fn make_zip(entries: &[(&str, u32, &[u8])]) -> Vec<u8> {
817        use std::io::Write;
818        let mut w = zip::ZipWriter::new(std::io::Cursor::new(Vec::new()));
819        for (name, mode, payload) in entries {
820            let opts = zip::write::SimpleFileOptions::default().unix_permissions(*mode);
821            w.start_file(*name, opts).unwrap();
822            w.write_all(payload).unwrap();
823        }
824        w.finish().unwrap().into_inner()
825    }
826
827    #[test]
828    fn extracts_zip_with_strip_and_modes() {
829        let h = home("zip");
830        let store = Store::open(&h).unwrap();
831        let bytes = make_zip(&[
832            ("terraform_1.9.0/terraform", 0o755, b"#!/bin/sh\necho tf\n"),
833            ("terraform_1.9.0/README.md", 0o644, b"docs"),
834        ]);
835        let archive_path = store.downloads_dir().join("a.zip");
836        fs::write(&archive_path, &bytes).unwrap();
837
838        let staging = store.new_staging().unwrap();
839        extract(
840            "zip",
841            &archive_path,
842            &staging,
843            "terraform",
844            1,
845            DEFAULT_MAX_DECOMPRESSED,
846        )
847        .unwrap();
848        let bin = staging.join("terraform");
849        assert!(bin.exists());
850        assert!(staging.join("README.md").exists());
851        #[cfg(unix)]
852        {
853            use std::os::unix::fs::PermissionsExt;
854            let mode = fs::metadata(&bin).unwrap().permissions().mode();
855            assert_eq!(mode & 0o777, 0o755, "exec bit preserved from zip modes");
856        }
857        let _ = fs::remove_dir_all(&h);
858    }
859
860    #[test]
861    fn zip_slip_rejected() {
862        let h = home("zipslip");
863        let store = Store::open(&h).unwrap();
864        let bytes = make_zip(&[("../evil", 0o644, b"pwn")]);
865        let archive_path = store.downloads_dir().join("evil.zip");
866        fs::write(&archive_path, &bytes).unwrap();
867
868        let staging = store.new_staging().unwrap();
869        let err = extract(
870            "zip",
871            &archive_path,
872            &staging,
873            "evil",
874            0,
875            DEFAULT_MAX_DECOMPRESSED,
876        )
877        .unwrap_err();
878        assert!(err.to_string().contains("traversal"), "{err}");
879        let _ = fs::remove_dir_all(&h);
880    }
881
882    #[test]
883    fn zip_decompression_budget_enforced() {
884        let h = home("zipbomb");
885        let store = Store::open(&h).unwrap();
886        let big = vec![0u8; 64 * 1024];
887        let bytes = make_zip(&[("big.bin", 0o644, &big[..])]);
888        let archive_path = store.downloads_dir().join("big.zip");
889        fs::write(&archive_path, &bytes).unwrap();
890
891        let staging = store.new_staging().unwrap();
892        // Budget below the decompressed size must abort, not fill the disk.
893        let err = extract("zip", &archive_path, &staging, "big", 0, 1024).unwrap_err();
894        assert!(err.to_string().contains("decompress"), "{err}");
895        let _ = fs::remove_dir_all(&h);
896    }
897
898    #[test]
899    fn rejects_unsupported_archive() {
900        let err = extract(
901            "tar.xz",
902            Path::new("/x"),
903            Path::new("/y"),
904            "t",
905            0,
906            DEFAULT_MAX_DECOMPRESSED,
907        )
908        .unwrap_err();
909        assert_eq!(err.area, Area::Inst);
910    }
911
912    // M5: an archive containing a symlink whose target escapes the tree (here an
913    // absolute path), followed by a write through that link, must be rejected.
914    #[test]
915    fn rejects_symlink_escape_archive() {
916        use flate2::write::GzEncoder;
917        use flate2::Compression;
918
919        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
920        // A symlink `evil` -> `/tmp/escape-target` (absolute).
921        let mut link = tar::Header::new_gnu();
922        link.set_entry_type(tar::EntryType::Symlink);
923        link.set_size(0);
924        link.set_mode(0o777);
925        builder
926            .append_link(&mut link, "evil", "/tmp/vanta-escape-target")
927            .unwrap();
928        // A regular write through the link path.
929        let payload = b"pwned";
930        let mut f = tar::Header::new_gnu();
931        f.set_size(payload.len() as u64);
932        f.set_mode(0o644);
933        f.set_cksum();
934        builder.append_data(&mut f, "evil", &payload[..]).unwrap();
935        let bytes = builder.into_inner().unwrap().finish().unwrap();
936
937        let h = home("symlink");
938        let store = Store::open(&h).unwrap();
939        let archive_path = store.downloads_dir().join("evil.tar.gz");
940        fs::write(&archive_path, &bytes).unwrap();
941        let staging = store.new_staging().unwrap();
942        let err = extract(
943            "tar.gz",
944            &archive_path,
945            &staging,
946            "tool",
947            0,
948            DEFAULT_MAX_DECOMPRESSED,
949        )
950        .unwrap_err();
951        assert_eq!(err.area, Area::Inst);
952        assert!(!Path::new("/tmp/vanta-escape-target").exists());
953        let _ = fs::remove_dir_all(&h);
954    }
955
956    // M8: a highly compressible archive that decompresses past the cap aborts.
957    #[test]
958    fn rejects_decompression_bomb() {
959        use flate2::write::GzEncoder;
960        use flate2::Compression;
961
962        let mut builder = tar::Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
963        let big = vec![0u8; 1_000_000]; // 1 MB of zeros, compresses tiny
964        let mut header = tar::Header::new_gnu();
965        header.set_size(big.len() as u64);
966        header.set_mode(0o644);
967        header.set_cksum();
968        builder.append_data(&mut header, "big", &big[..]).unwrap();
969        let bytes = builder.into_inner().unwrap().finish().unwrap();
970
971        let h = home("bomb");
972        let store = Store::open(&h).unwrap();
973        let archive_path = store.downloads_dir().join("bomb.tar.gz");
974        fs::write(&archive_path, &bytes).unwrap();
975        let staging = store.new_staging().unwrap();
976        // Cap well below the 1 MB payload → extraction must fail.
977        let err = extract("tar.gz", &archive_path, &staging, "tool", 0, 4096).unwrap_err();
978        assert_eq!(err.area, Area::Inst);
979        let _ = fs::remove_dir_all(&h);
980    }
981}