Skip to main content

ant_node/upgrade/
binary_cache.rs

1//! Disk cache for downloaded upgrade archives.
2//!
3//! When multiple ant-node instances detect the same upgrade, only the first
4//! one needs to download the archive. `BinaryCache` stores the **signed
5//! archive together with its detached ML-DSA-65 signature** so that
6//! subsequent nodes can reuse it.
7//!
8//! ## Security model
9//!
10//! The ML-DSA-65 signature is the security gate, and it covers the *archive*
11//! bytes — not the extracted binary. A previous version cached the extracted
12//! binary and, on a cache hit, returned it after only a SHA-256 check against
13//! a sibling metadata file. SHA-256 is not a security control: anyone able to
14//! write to the shared cache directory (a co-located process, a shared
15//! container volume, a low-privilege foothold) could replace the cached
16//! binary and its `.meta.json` with a matching hash, and the next node would
17//! execute it **without any signature verification** — persistent RCE.
18//!
19//! This module now caches the *archive + signature* and, on **every** cache
20//! hit, re-runs ML-DSA-65 verification over the cached archive before it is
21//! used. A tampered archive fails verification (the release key is pinned in
22//! the binary and cannot be forged); a tampered or missing signature fails
23//! likewise. The extracted binary is always derived fresh from the
24//! just-verified archive by the caller, so a poisoned cache entry can never
25//! be executed. The SHA-256 metadata is retained only as a fast corruption
26//! pre-check, never as the trust decision.
27//!
28//! ## Residual: cache entries are not bound to a specific release version
29//!
30//! `signature::SIGNING_CONTEXT = "ant-node-release-v1"` is constant across
31//! versions, so the ML-DSA signature attests to "this archive is a valid
32//! ant-node release", not "this archive is release X.Y.Z". An attacker with
33//! cache-dir write access who possesses any past validly-signed release can
34//! plant it under a newer version's cache key; the next node performing
35//! that upgrade accepts it and runs it as the newer version. Net effect:
36//! forced downgrade or wrong-arch crash loop, not arbitrary RCE.
37//!
38//! This is out of scope of the cache-poisoning RCE class this module
39//! addresses (which trusted SHA-256 alone on cache hits): the `cache_dir`
40//! is `0o700` (defence in depth, see `cache_dir.rs`) and the attacker
41//! already needs same-UID write to exploit this — they can replace the
42//! running binary directly. Closing the gap properly requires upstream
43//! release-signing changes (the signing context must include the version
44//! string, e.g. `b"ant-node-release-v1:1.2.3"`) and is tracked as a
45//! follow-up.
46
47use crate::error::{Error, Result};
48use crate::logging::{debug, warn};
49use crate::upgrade::signature;
50use fs2::FileExt;
51use saorsa_pqc::api::sig::MlDsaPublicKey;
52use serde::{Deserialize, Serialize};
53use sha2::{Digest, Sha256};
54use std::fs::{self, File, OpenOptions};
55use std::io::{self, Read, Write};
56use std::path::{Path, PathBuf};
57
58/// Maximum size accepted for the `.meta.json` sidecar.
59///
60/// A well-formed `CachedArchiveMeta` serialises to roughly 120 bytes; the
61/// 4 KiB cap is comfortably above any legitimate payload and tight enough
62/// that an attacker who plants a metadata file the size of `/dev/zero`
63/// cannot stall the metadata read into a hang or OOM.
64const MAX_META_BYTES: u64 = 4 * 1024;
65
66/// On-disk cache for downloaded, signature-verified upgrade archives.
67#[derive(Clone)]
68pub struct BinaryCache {
69    /// Directory that holds cached archives, signatures, and metadata.
70    cache_dir: PathBuf,
71    /// Verification key override. `None` in production → the pinned release
72    /// key embedded in [`signature`] is used (the real, unforgeable gate).
73    /// Only ever `Some` via the `#[cfg(test)]` constructor, so test builds
74    /// can exercise the cache with a generated keypair without weakening the
75    /// production trust anchor in any way.
76    verify_key: Option<MlDsaPublicKey>,
77}
78
79/// Metadata written alongside each cached archive.
80///
81/// The SHA-256 here is a fast integrity/corruption pre-check only. It is
82/// **not** a security control: the ML-DSA-65 signature over the archive is
83/// re-verified on every cache hit regardless of this value.
84#[derive(Serialize, Deserialize)]
85struct CachedArchiveMeta {
86    /// Semantic version string (e.g. "1.2.3").
87    version: String,
88    /// Hex-encoded SHA-256 digest of the cached archive (corruption check).
89    archive_sha256: String,
90    /// When the archive was cached (seconds since UNIX epoch).
91    cached_at_epoch_secs: u64,
92}
93
94impl BinaryCache {
95    /// Create a new binary cache backed by the given directory.
96    ///
97    /// Production constructor: the cache verifies cached archives against the
98    /// pinned release public key embedded in the binary.
99    #[must_use]
100    pub fn new(cache_dir: PathBuf) -> Self {
101        Self {
102            cache_dir,
103            verify_key: None,
104        }
105    }
106
107    /// Test-only constructor that verifies against an explicit public key
108    /// instead of the pinned release key (the production trust anchor is
109    /// unchanged; this only exists so unit tests can produce verifiable
110    /// signatures with a generated keypair).
111    #[cfg(test)]
112    #[must_use]
113    pub fn new_with_verify_key(cache_dir: PathBuf, verify_key: MlDsaPublicKey) -> Self {
114        Self {
115            cache_dir,
116            verify_key: Some(verify_key),
117        }
118    }
119
120    /// Path of the cached archive for `version`.
121    #[must_use]
122    pub fn cached_archive_path(&self, version: &str) -> PathBuf {
123        self.cache_dir.join(format!("ant-node-{version}.archive"))
124    }
125
126    /// Path of the cached detached signature for `version`.
127    #[must_use]
128    fn cached_signature_path(&self, version: &str) -> PathBuf {
129        self.cache_dir.join(format!("ant-node-{version}.sig"))
130    }
131
132    /// Verify `archive` against `sig` using the pinned release key in
133    /// production, or the injected test key under `#[cfg(test)]`.
134    fn verify_archive(&self, archive: &Path, sig: &Path) -> Result<()> {
135        self.verify_key.as_ref().map_or_else(
136            || signature::verify_from_file(archive, sig),
137            |key| signature::verify_from_file_with_key(archive, sig, key),
138        )
139    }
140
141    /// Copy the cached archive into the caller-private `private_dir`,
142    /// **cryptographically re-verify that private copy**, and return its
143    /// path — or `None` if there is no usable, trusted cache entry.
144    ///
145    /// On every call this:
146    /// 1. loads the sibling metadata and checks the version matches,
147    /// 2. copies the cached archive + signature into `private_dir` (a
148    ///    location only this process writes, e.g. the per-upgrade temp dir),
149    /// 3. SHA-256 pre-checks the private copy against the metadata (fast
150    ///    corruption check), then
151    /// 4. **re-verifies the ML-DSA-65 signature over the private copy** with
152    ///    the pinned release key — the actual security gate.
153    ///
154    /// Verifying the *private copy* (not the shared cache file) closes the
155    /// TOCTOU window: an attacker with write access to the shared cache dir
156    /// cannot swap the bytes between verification and extraction, because the
157    /// caller extracts from the returned private path, which is the exact
158    /// byte sequence that was verified and is unreachable to the attacker.
159    ///
160    /// Any failure (missing/corrupt metadata, copy error, hash mismatch,
161    /// missing signature, or — critically — a signature that does not verify
162    /// against the pinned release key) returns `None`, forcing a fresh,
163    /// fully verified download.
164    ///
165    /// The caller MUST extract the binary from the returned (private) archive
166    /// path, so the executed bytes always derive from signature-verified
167    /// input that no other principal could have modified post-verification.
168    ///
169    /// `private_dir` is a load-bearing security invariant: it MUST be a
170    /// process-private, mode-`0o700` directory that no other principal
171    /// can write to. The caller in `apply.rs` creates it via
172    /// `tempfile::Builder::permissions(0o700).tempdir_in(binary_dir)` —
173    /// any future caller MUST uphold the same invariant, otherwise the
174    /// reopens by path in `sha256_file` and `verify_archive` would re-
175    /// introduce a TOCTOU window.
176    // The verifier-side cache-hit gate is read top-to-bottom by anyone
177    // auditing the security model. Splitting it into smaller helpers just
178    // to placate clippy's line limit would scatter the threat model across
179    // call sites without improving safety.
180    #[allow(clippy::too_many_lines)]
181    #[must_use]
182    pub fn get_verified_archive(&self, version: &str, private_dir: &Path) -> Option<PathBuf> {
183        let cached_archive = self.cached_archive_path(version);
184        let cached_sig = self.cached_signature_path(version);
185        let meta_path = self.meta_path(version);
186
187        // Read the metadata sidecar with a small, opened-handle size cap so
188        // an attacker with cache-dir write cannot plant `meta.json` as a
189        // symlink to `/dev/zero` (or any large/special file) and force a
190        // hang/OOM here before the archive/sig hardening runs.
191        let meta_data = {
192            let (mut meta_file, meta_len) = match open_regular_capped(&meta_path, MAX_META_BYTES) {
193                Ok(pair) => pair,
194                Err(e) => {
195                    debug!("Rejecting cache metadata for {version}: {e}");
196                    return None;
197                }
198            };
199            // `meta_len` is capped at MAX_META_BYTES (4 KiB), so this
200            // truncation can never happen in practice; saturating_cast
201            // makes that explicit for clippy on 32-bit targets.
202            let cap = usize::try_from(meta_len).unwrap_or(usize::MAX);
203            let mut buf = String::with_capacity(cap);
204            if let Err(e) = meta_file.read_to_string(&mut buf) {
205                debug!("Failed to read cache metadata for {version}: {e}");
206                return None;
207            }
208            buf
209        };
210        let meta: CachedArchiveMeta = serde_json::from_str(&meta_data).ok()?;
211
212        if meta.version != version {
213            debug!("Binary cache version mismatch in metadata");
214            return None;
215        }
216
217        // Open archive + signature ONCE each with size and file-type
218        // validation on the opened handles. Subsequent reads / hash /
219        // signature verification all go through the FDs opened here — there
220        // is no second path-based stat or open after this point, so an
221        // attacker who races a swap on the cache-dir paths (symlink, FIFO,
222        // device, oversized file) after these validations cannot redirect
223        // what gets staged into the private dir.
224        //
225        // Memory pressure note: `signature::verify_from_file*` reads the
226        // archive into memory in full (it is the FIPS-204 verifier's
227        // contract — message must be provided as a slice). `sha256_file`
228        // streams in 8 KiB chunks and is not an OOM vector. The
229        // `MAX_ARCHIVE_SIZE_BYTES` cap bounds the in-memory load and the
230        // staging-dir disk footprint together.
231        let (mut archive_file, archive_len) = match open_regular_capped(
232            &cached_archive,
233            crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64,
234        ) {
235            Ok(pair) => pair,
236            Err(e) => {
237                warn!("Rejecting cached archive for {version}: {e}");
238                return None;
239            }
240        };
241        let (mut sig_file, sig_len) =
242            match open_regular_capped(&cached_sig, signature::SIGNATURE_SIZE as u64) {
243                Ok(pair) => pair,
244                Err(e) => {
245                    warn!("Rejecting cached signature for {version}: {e}");
246                    return None;
247                }
248            };
249        if sig_len != signature::SIGNATURE_SIZE as u64 {
250            // open_regular_capped enforces ≤ max; we additionally require
251            // EXACTLY SIGNATURE_SIZE (a shorter sig is not valid ML-DSA-65).
252            warn!(
253                "Cached signature for {version} has wrong size ({sig_len} bytes, \
254                 expected {})",
255                signature::SIGNATURE_SIZE
256            );
257            return None;
258        }
259
260        // Stream the validated archive + signature into the caller-private
261        // directory FROM THE ALREADY-OPEN HANDLES (not from the path), so
262        // the bytes the verifier reads are the exact bytes the open-handle
263        // metadata checks were performed against. `take()` is belt-and-
264        // braces against an attacker who extends the file after open.
265        let private_archive = private_dir.join(format!("cached-{version}.archive"));
266        let private_sig = private_dir.join(format!("cached-{version}.sig"));
267
268        let cleanup = |reason: &str| {
269            debug!("Cleaning staged cache copy for {version}: {reason}");
270            let _ = fs::remove_file(&private_archive);
271            let _ = fs::remove_file(&private_sig);
272        };
273
274        if let Err(e) = (|| -> io::Result<()> {
275            let mut dest = File::create(&private_archive)?;
276            io::copy(&mut (&mut archive_file).take(archive_len), &mut dest)?;
277            Ok(())
278        })() {
279            debug!("Could not stage cached archive for {version}: {e}");
280            cleanup("archive copy failed");
281            return None;
282        }
283        if let Err(e) = (|| -> io::Result<()> {
284            let mut dest = File::create(&private_sig)?;
285            io::copy(&mut (&mut sig_file).take(sig_len), &mut dest)?;
286            Ok(())
287        })() {
288            debug!("Could not stage cached signature for {version}: {e}");
289            cleanup("signature copy failed");
290            return None;
291        }
292
293        // Fast corruption pre-check on the PRIVATE copy (NOT the security
294        // decision). A copy error or truncation surfaces here.
295        let actual_hash = match sha256_file(&private_archive) {
296            Ok(h) => h,
297            Err(e) => {
298                cleanup(&format!("sha256 read failed: {e}"));
299                return None;
300            }
301        };
302        if actual_hash != meta.archive_sha256 {
303            warn!(
304                "Binary cache SHA-256 mismatch for version {version} \
305                 (expected {}, got {actual_hash}) — ignoring cache entry",
306                meta.archive_sha256
307            );
308            cleanup("sha256 mismatch");
309            return None;
310        }
311
312        // THE SECURITY GATE: re-verify the ML-DSA-65 signature over the
313        // PRIVATE archive copy on every hit. The returned path is this same
314        // private copy, so the caller extracts exactly the bytes that were
315        // verified — a cache entry tampered with on disk (binary/archive
316        // swap, forged metadata, or a post-verify swap attempt) cannot
317        // produce a private copy whose signature verifies against the
318        // pinned release key.
319        if let Err(e) = self.verify_archive(&private_archive, &private_sig) {
320            warn!(
321                "Cached archive for version {version} FAILED ML-DSA signature \
322                 re-verification ({e}); discarding cache entry (possible \
323                 on-disk tampering). A fresh verified download will run."
324            );
325            cleanup("signature re-verification failed");
326            return None;
327        }
328
329        debug!("Cached archive for version {version} passed ML-DSA re-verification");
330        Some(private_archive)
331    }
332
333    /// Store a signature-verified archive in the cache.
334    ///
335    /// Both files are persisted (via write-to-temp-then-rename so readers
336    /// never observe partial writes); the metadata file is written last so
337    /// [`get_verified_archive`](Self::get_verified_archive) only succeeds
338    /// once every file is complete.
339    ///
340    /// Defence in depth: this re-verifies the archive against its signature
341    /// before caching, so a poisoned entry cannot be created through the
342    /// supported path even if a caller forgot to verify first.
343    ///
344    /// # Errors
345    ///
346    /// Returns an error if the signature does not verify, the inputs cannot
347    /// be read, or the cache files cannot be written.
348    pub fn store_archive(
349        &self,
350        version: &str,
351        archive_path: &Path,
352        signature_path: &Path,
353    ) -> Result<()> {
354        // Defence in depth: refuse to persist a non-regular file, an
355        // oversize archive, or a misshapen signature — mirroring the
356        // `get_verified_archive` cache-hit policy. `symlink_metadata`
357        // refuses to chase a symlink the caller may have planted.
358        //
359        // Note the intentional asymmetry with `open_regular_capped`
360        // (which uses `fs::metadata` and DOES follow symlinks): on the
361        // store path the source file is supplied by the caller (typically
362        // a path under our control after download), so a symlink there is
363        // surprising and worth rejecting. On the read path the cache dir
364        // is shared and an attacker may have planted a symlink — but the
365        // attacker already has write access, so chasing a symlink-to-
366        // regular is no worse than them editing the regular file
367        // directly, while still letting the post-open `is_file()` reject
368        // symlink-to-special.
369        let archive_meta = fs::symlink_metadata(archive_path)?;
370        if !archive_meta.file_type().is_file() {
371            return Err(Error::Upgrade(format!(
372                "Refusing to cache archive for {version}: source is not a \
373                 regular file (symlink/special)"
374            )));
375        }
376        let archive_len = archive_meta.len();
377        if archive_len > crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 {
378            return Err(Error::Upgrade(format!(
379                "Refusing to cache archive for {version}: size {archive_len} bytes \
380                 exceeds MAX_ARCHIVE_SIZE_BYTES"
381            )));
382        }
383        let sig_meta = fs::symlink_metadata(signature_path)?;
384        if !sig_meta.file_type().is_file() {
385            return Err(Error::Upgrade(format!(
386                "Refusing to cache archive for {version}: signature is not a \
387                 regular file (symlink/special)"
388            )));
389        }
390        let sig_len = sig_meta.len();
391        if sig_len != signature::SIGNATURE_SIZE as u64 {
392            return Err(Error::Upgrade(format!(
393                "Refusing to cache archive for {version}: signature size {sig_len} \
394                 bytes, expected {}",
395                signature::SIGNATURE_SIZE
396            )));
397        }
398
399        self.verify_archive(archive_path, signature_path)
400            .map_err(|e| {
401                Error::Upgrade(format!(
402                    "Refusing to cache archive for {version}: signature does not verify ({e})"
403                ))
404            })?;
405
406        let archive_hash = sha256_file(archive_path)?;
407
408        let dest_archive = self.cached_archive_path(version);
409        let dest_sig = self.cached_signature_path(version);
410        let meta_path = self.meta_path(version);
411
412        Self::atomic_copy(
413            archive_path,
414            &dest_archive,
415            &self
416                .cache_dir
417                .join(format!(".ant-node-{version}.archive.tmp")),
418        )?;
419        Self::atomic_copy(
420            signature_path,
421            &dest_sig,
422            &self.cache_dir.join(format!(".ant-node-{version}.sig.tmp")),
423        )?;
424
425        let now = std::time::SystemTime::now()
426            .duration_since(std::time::UNIX_EPOCH)
427            .map_err(|e| Error::Upgrade(format!("System clock error: {e}")))?
428            .as_secs();
429
430        let meta = CachedArchiveMeta {
431            version: version.to_string(),
432            archive_sha256: archive_hash,
433            cached_at_epoch_secs: now,
434        };
435
436        let meta_json = serde_json::to_string(&meta).map_err(|e| {
437            Error::Upgrade(format!("Failed to serialize cached archive metadata: {e}"))
438        })?;
439
440        // Metadata written last so a reader never sees a complete meta file
441        // pointing at an incomplete archive/signature pair.
442        let tmp_meta = self.cache_dir.join(format!(".ant-node-{version}.meta.tmp"));
443        let mut f = File::create(&tmp_meta)?;
444        f.write_all(meta_json.as_bytes())?;
445        f.sync_all()?;
446        drop(f);
447        let _ = fs::remove_file(&meta_path);
448        fs::rename(&tmp_meta, &meta_path)?;
449
450        debug!(
451            "Cached verified archive for version {version} at {}",
452            dest_archive.display()
453        );
454        Ok(())
455    }
456
457    /// Acquire an exclusive download lock and return the guard.
458    ///
459    /// This prevents multiple nodes from downloading the same archive
460    /// concurrently — the first acquires the lock and downloads, the rest
461    /// wait and then find the archive already cached.
462    ///
463    /// The lock is released when the returned guard is dropped.
464    ///
465    /// **Note:** `lock_exclusive()` blocks the calling thread. Callers in
466    /// async contexts should wrap this call in `tokio::task::spawn_blocking`.
467    ///
468    /// # Errors
469    ///
470    /// Returns an error if the lock file cannot be created or acquired.
471    pub fn acquire_download_lock(&self) -> Result<DownloadLockGuard> {
472        let lock_path = self.cache_dir.join("download.lock");
473        let lock = File::create(&lock_path)
474            .map_err(|e| Error::Upgrade(format!("Failed to create download lock: {e}")))?;
475        lock.lock_exclusive()
476            .map_err(|e| Error::Upgrade(format!("Failed to acquire download lock: {e}")))?;
477        Ok(DownloadLockGuard { _file: lock })
478    }
479
480    // -- private helpers -----------------------------------------------------
481
482    /// Copy `src` to `dest` atomically via a temp file + rename.
483    fn atomic_copy(src: &Path, dest: &Path, tmp: &Path) -> Result<()> {
484        fs::copy(src, tmp)?;
485        // Remove dest first on Windows where rename fails if it exists.
486        let _ = fs::remove_file(dest);
487        fs::rename(tmp, dest)?;
488        Ok(())
489    }
490
491    fn meta_path(&self, version: &str) -> PathBuf {
492        self.cache_dir.join(format!("ant-node-{version}.meta.json"))
493    }
494}
495
496/// RAII guard that holds an exclusive download lock.
497///
498/// The underlying file lock is released when this guard is dropped.
499pub struct DownloadLockGuard {
500    _file: File,
501}
502
503/// Open `path` as a regular file with size at most `max_len`, validating
504/// the metadata on the **opened handle** so a race between any prior stat
505/// and the read cannot substitute a special file (FIFO/device/socket) or
506/// an oversized payload. A symlink whose target is a regular file is
507/// accepted (it's just an indirect path to a regular file — the attacker
508/// who placed the link already needed write access to the cache dir, the
509/// same access level as directly editing the regular file); a symlink
510/// whose target is a special file is rejected by the `is_file()` check on
511/// the opened handle.
512///
513/// On Unix, `open()` of a FIFO/named-pipe for reading blocks until a
514/// writer connects, so a cache-dir attacker could otherwise hang the
515/// upgrade indefinitely by planting a FIFO at the cache entry's path. We
516/// (a) reject non-regular files via a `fs::metadata()` pre-check (follows
517/// symlinks, so a symlink-to-regular is still accepted), and (b) on Unix
518/// also open with `O_NONBLOCK` as a belt-and-braces defence in case the
519/// pre-check races a swap. The post-open `is_file()` on the opened handle
520/// remains the TOCTOU-safe gate.
521///
522/// Returns `(File, len)` on success; the returned `File` is positioned at
523/// offset 0 and may be `io::copy`'d into a destination — callers should
524/// wrap with `Read::take(max_len)` so an attacker who extends the file
525/// after the metadata read cannot stream beyond the cap.
526fn open_regular_capped(path: &Path, max_len: u64) -> io::Result<(File, u64)> {
527    // Pre-check: refuse to even open a non-regular file. This is the
528    // first line of defence against an attacker who planted a FIFO at
529    // `path` — opening a FIFO for reading on Unix blocks until a writer
530    // connects, hanging the upgrade indefinitely. `fs::metadata` follows
531    // symlinks, so a symlink whose target is a regular file is accepted
532    // here and a symlink whose target is a FIFO/device/socket is rejected.
533    let pre_meta = fs::metadata(path)?;
534    if !pre_meta.file_type().is_file() {
535        return Err(io::Error::new(
536            io::ErrorKind::InvalidInput,
537            "not a regular file (FIFO/device/socket/dir)",
538        ));
539    }
540
541    // Belt-and-braces against a pre-check vs open() race: on Unix also
542    // open with O_NONBLOCK, so even if an attacker swaps the regular file
543    // for a FIFO between the metadata read and open(), the open() returns
544    // immediately instead of blocking on a writer. Reads on a regular file
545    // ignore O_NONBLOCK, so this is a no-op for the happy path. The
546    // post-open is_file() check below still catches the swap.
547    let file = {
548        let mut opts = OpenOptions::new();
549        opts.read(true);
550        #[cfg(unix)]
551        {
552            use std::os::unix::fs::OpenOptionsExt;
553            // `O_NONBLOCK` is per-arch on Linux (0o4000 on x86/arm/aarch64
554            // /riscv, 0o200 on mips, 0x4000 on sparc, etc.). Use `libc`
555            // so we always pick the right constant for the target arch
556            // instead of silently setting a different flag. Reads on a
557            // regular file ignore `O_NONBLOCK` on all our supported
558            // platforms, so this is a no-op for the happy path.
559            opts.custom_flags(libc::O_NONBLOCK);
560        }
561        opts.open(path)?
562    };
563    let meta = file.metadata()?;
564    if !meta.file_type().is_file() {
565        return Err(io::Error::new(
566            io::ErrorKind::InvalidInput,
567            "not a regular file (FIFO/device/socket/dir)",
568        ));
569    }
570    let len = meta.len();
571    if len > max_len {
572        return Err(io::Error::new(
573            io::ErrorKind::InvalidInput,
574            format!("file exceeds size cap ({len} > {max_len})"),
575        ));
576    }
577    Ok((file, len))
578}
579
580/// Compute the hex-encoded SHA-256 digest of a file.
581fn sha256_file(path: &Path) -> Result<String> {
582    let mut file = File::open(path)?;
583    let mut hasher = Sha256::new();
584    let mut buf = [0u8; 8192];
585    loop {
586        let n = file
587            .read(&mut buf)
588            .map_err(|e| Error::Upgrade(format!("Failed to read file for hashing: {e}")))?;
589        if n == 0 {
590            break;
591        }
592        hasher.update(&buf[..n]);
593    }
594    Ok(hex::encode(hasher.finalize()))
595}
596
597// ---------------------------------------------------------------------------
598// Tests
599// ---------------------------------------------------------------------------
600
601#[cfg(test)]
602#[allow(clippy::unwrap_used, clippy::expect_used)]
603mod tests {
604    use super::*;
605    use saorsa_pqc::api::sig::{ml_dsa_65, MlDsaPublicKey, MlDsaSecretKey};
606    use std::sync::OnceLock;
607    use tempfile::TempDir;
608
609    /// One generated keypair for the whole test module (keygen is expensive).
610    fn test_keypair() -> &'static (MlDsaPublicKey, MlDsaSecretKey) {
611        static KP: OnceLock<(MlDsaPublicKey, MlDsaSecretKey)> = OnceLock::new();
612        KP.get_or_init(|| ml_dsa_65().generate_keypair().unwrap())
613    }
614
615    fn cache_with_test_key(dir: &Path) -> BinaryCache {
616        BinaryCache::new_with_verify_key(dir.to_path_buf(), test_keypair().0.clone())
617    }
618
619    /// A caller-private staging directory (the per-upgrade temp dir in
620    /// production). Returned so it outlives the call.
621    fn priv_dir() -> TempDir {
622        TempDir::new().unwrap()
623    }
624
625    /// Write an archive + a valid detached signature over it.
626    fn make_signed_archive(dir: &Path, contents: &[u8]) -> (PathBuf, PathBuf) {
627        let archive = dir.join("src-archive");
628        fs::write(&archive, contents).unwrap();
629        let sig = ml_dsa_65()
630            .sign_with_context(&test_keypair().1, contents, signature::SIGNING_CONTEXT)
631            .unwrap();
632        let sig_path = dir.join("src-archive.sig");
633        fs::write(&sig_path, sig.to_bytes()).unwrap();
634        (archive, sig_path)
635    }
636
637    #[test]
638    fn test_miss_returns_none() {
639        let tmp = TempDir::new().unwrap();
640        let cache = cache_with_test_key(tmp.path());
641        let pd = priv_dir();
642        assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
643    }
644
645    #[test]
646    fn test_store_and_get_verified_archive() {
647        let tmp = TempDir::new().unwrap();
648        let cache = cache_with_test_key(tmp.path());
649        let pd = priv_dir();
650
651        let (archive, sig) = make_signed_archive(tmp.path(), b"signed archive bytes");
652        cache.store_archive("1.2.3", &archive, &sig).unwrap();
653
654        let got = cache
655            .get_verified_archive("1.2.3", pd.path())
656            .expect("cache hit");
657        assert_eq!(fs::read(&got).unwrap(), b"signed archive bytes");
658        // The returned path must be the PRIVATE copy, not the shared cache
659        // file (that is what closes the verify/extract TOCTOU).
660        assert!(
661            got.starts_with(pd.path()),
662            "returned archive must be the caller-private copy, got {got:?}"
663        );
664        assert_ne!(got, cache.cached_archive_path("1.2.3"));
665    }
666
667    #[test]
668    fn test_store_rejects_unsigned_archive() {
669        let tmp = TempDir::new().unwrap();
670        let cache = cache_with_test_key(tmp.path());
671        let pd = priv_dir();
672
673        let archive = tmp.path().join("a");
674        fs::write(&archive, b"unsigned").unwrap();
675        let bad_sig = tmp.path().join("a.sig");
676        fs::write(&bad_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap();
677
678        assert!(cache.store_archive("1.0.0", &archive, &bad_sig).is_err());
679        assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
680    }
681
682    /// An attacker who swaps the cached archive on disk (and even forges a
683    /// matching SHA-256 in the metadata) cannot get it trusted, because
684    /// the ML-DSA signature is re-verified on every hit.
685    #[test]
686    fn test_tampered_cached_archive_is_rejected() {
687        let tmp = TempDir::new().unwrap();
688        let cache = cache_with_test_key(tmp.path());
689        let pd = priv_dir();
690
691        let (archive, sig) = make_signed_archive(tmp.path(), b"legit release archive");
692        cache.store_archive("2.0.0", &archive, &sig).unwrap();
693        assert!(cache.get_verified_archive("2.0.0", pd.path()).is_some());
694
695        // Attacker overwrites the cached archive with a malicious payload...
696        let cached_archive = cache.cached_archive_path("2.0.0");
697        fs::write(&cached_archive, b"malicious payload").unwrap();
698
699        // ...and forges the metadata SHA-256 so the corruption pre-check passes.
700        let forged_hash = {
701            let mut h = Sha256::new();
702            h.update(b"malicious payload");
703            hex::encode(h.finalize())
704        };
705        let meta = CachedArchiveMeta {
706            version: "2.0.0".to_string(),
707            archive_sha256: forged_hash,
708            cached_at_epoch_secs: 0,
709        };
710        fs::write(
711            cache.meta_path("2.0.0"),
712            serde_json::to_string(&meta).unwrap(),
713        )
714        .unwrap();
715
716        // The SHA-256 pre-check now passes, but ML-DSA re-verification of the
717        // swapped archive against the key fails → entry rejected.
718        assert!(
719            cache.get_verified_archive("2.0.0", pd.path()).is_none(),
720            "tampered cache entry must NOT be trusted even with a forged \
721             matching SHA-256 — the signature gate runs on every hit"
722        );
723    }
724
725    /// TOCTOU defence: even if an attacker swaps the *shared* cache archive
726    /// for malicious bytes immediately after a hit, the previously returned
727    /// path (a caller-private copy) still contains the verified bytes, so
728    /// what gets extracted/executed is exactly what was signature-verified.
729    #[test]
730    fn test_returned_archive_is_private_copy_immune_to_post_hit_swap() {
731        let tmp = TempDir::new().unwrap();
732        let cache = cache_with_test_key(tmp.path());
733        let pd = priv_dir();
734
735        let (archive, sig) = make_signed_archive(tmp.path(), b"the real signed release");
736        cache.store_archive("3.0.0", &archive, &sig).unwrap();
737
738        let verified = cache
739            .get_verified_archive("3.0.0", pd.path())
740            .expect("cache hit");
741
742        // Attacker swaps the SHARED cache archive right after verification.
743        fs::write(
744            cache.cached_archive_path("3.0.0"),
745            b"post-verify malicious swap",
746        )
747        .unwrap();
748
749        // The path the caller will extract from is the private copy and is
750        // unaffected by the shared-file swap.
751        assert_eq!(
752            fs::read(&verified).unwrap(),
753            b"the real signed release",
754            "extraction must read the verified private bytes, not the \
755             attacker's post-verification swap"
756        );
757    }
758
759    #[test]
760    fn test_missing_signature_returns_none() {
761        let tmp = TempDir::new().unwrap();
762        let cache = cache_with_test_key(tmp.path());
763        let pd = priv_dir();
764
765        let (archive, sig) = make_signed_archive(tmp.path(), b"data");
766        cache.store_archive("1.0.0", &archive, &sig).unwrap();
767
768        // Attacker deletes the signature to try to skip verification.
769        fs::remove_file(cache.cached_signature_path("1.0.0")).unwrap();
770        assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
771    }
772
773    #[test]
774    fn test_missing_meta_returns_none() {
775        let tmp = TempDir::new().unwrap();
776        let cache = cache_with_test_key(tmp.path());
777        let pd = priv_dir();
778        let (archive, sig) = make_signed_archive(tmp.path(), b"data");
779        cache.store_archive("1.0.0", &archive, &sig).unwrap();
780        fs::remove_file(cache.meta_path("1.0.0")).unwrap();
781        assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
782    }
783
784    /// Size policy: an attacker with cache-dir write cannot OOM/disk-exhaust
785    /// the verifier by dropping a multi-GB archive — `get_verified_archive`
786    /// stat-checks the cached archive against `MAX_ARCHIVE_SIZE_BYTES` BEFORE
787    /// any copy or `fs::read` reaches `signature::verify_from_file`.
788    #[test]
789    fn test_oversize_cached_archive_is_rejected_before_copy() {
790        let tmp = TempDir::new().unwrap();
791        let cache = cache_with_test_key(tmp.path());
792        let pd = priv_dir();
793
794        // Plant a real signed entry so the meta/sig pass earlier checks…
795        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
796        cache.store_archive("3.1.0", &archive, &sig).unwrap();
797        // …then truncate-grow the cached archive past the limit.
798        let cached_archive = cache.cached_archive_path("3.1.0");
799        let oversize = crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1;
800        {
801            let f = File::create(&cached_archive).unwrap();
802            f.set_len(oversize).unwrap();
803        }
804
805        // The size gate rejects pre-copy → no private archive ever staged.
806        assert!(cache.get_verified_archive("3.1.0", pd.path()).is_none());
807        let private_archive = pd.path().join("cached-3.1.0.archive");
808        assert!(
809            !private_archive.exists(),
810            "oversize entry must NOT be staged into private dir"
811        );
812    }
813
814    #[test]
815    fn test_wrong_size_signature_is_rejected_before_copy() {
816        let tmp = TempDir::new().unwrap();
817        let cache = cache_with_test_key(tmp.path());
818        let pd = priv_dir();
819
820        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
821        cache.store_archive("3.2.0", &archive, &sig).unwrap();
822        // Replace the cached signature with the wrong size.
823        fs::write(cache.cached_signature_path("3.2.0"), b"too-short").unwrap();
824
825        assert!(cache.get_verified_archive("3.2.0", pd.path()).is_none());
826    }
827
828    /// `store_archive` itself refuses to persist an oversize archive — even
829    /// from a (hypothetically) misbehaving caller that bypassed the
830    /// download-time size cap.
831    #[test]
832    fn test_store_archive_rejects_oversize() {
833        let tmp = TempDir::new().unwrap();
834        let cache = cache_with_test_key(tmp.path());
835
836        // Make a sparse "archive" past the limit and any signature.
837        let big = tmp.path().join("big.archive");
838        {
839            let f = File::create(&big).unwrap();
840            f.set_len(crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1)
841                .unwrap();
842        }
843        let any_sig = tmp.path().join("any.sig");
844        fs::write(&any_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap();
845
846        assert!(cache.store_archive("9.9.9", &big, &any_sig).is_err());
847    }
848
849    /// Round-3 regression: a cache-dir writer cannot bypass the size gate
850    /// by planting a symlink whose `stat(2)` size is small but whose
851    /// target reads indefinitely (e.g. `/dev/zero`). `symlink_metadata`
852    /// + `is_file()` rejects the entry before any `fs::copy` reads it.
853    #[cfg(unix)]
854    #[test]
855    fn test_symlink_cached_archive_is_rejected_before_copy() {
856        let tmp = TempDir::new().unwrap();
857        let cache = cache_with_test_key(tmp.path());
858        let pd = priv_dir();
859
860        // Plant a legit signed entry so meta/version/sig-size are good…
861        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
862        cache.store_archive("4.0.0", &archive, &sig).unwrap();
863        // …then replace the cached archive with a symlink to /dev/zero.
864        let cached_archive = cache.cached_archive_path("4.0.0");
865        fs::remove_file(&cached_archive).unwrap();
866        std::os::unix::fs::symlink("/dev/zero", &cached_archive).unwrap();
867
868        assert!(
869            cache.get_verified_archive("4.0.0", pd.path()).is_none(),
870            "a symlinked cached archive must be rejected pre-copy, \
871             not chased into /dev/zero"
872        );
873        // Nothing should have been staged.
874        assert!(!pd.path().join("cached-4.0.0.archive").exists());
875    }
876
877    /// `.meta.json` is read through the same size/file-type gate as the
878    /// archive and signature: planting a multi-MB metadata file (or a
879    /// metadata symlink to a special file) is rejected pre-parse without
880    /// risking a hang or large allocation.
881    #[test]
882    fn test_oversized_meta_is_rejected() {
883        let tmp = TempDir::new().unwrap();
884        let cache = cache_with_test_key(tmp.path());
885        let pd = priv_dir();
886
887        // Establish a valid entry so archive/sig are well-formed.
888        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
889        cache.store_archive("5.0.0", &archive, &sig).unwrap();
890
891        // Overwrite meta with a file well above MAX_META_BYTES of garbage.
892        let meta_path = cache.meta_path("5.0.0");
893        let huge = vec![b'a'; usize::try_from(MAX_META_BYTES).unwrap_or(usize::MAX) + 1024];
894        fs::write(&meta_path, &huge).unwrap();
895
896        assert!(
897            cache.get_verified_archive("5.0.0", pd.path()).is_none(),
898            "oversized metadata file must be rejected before parsing"
899        );
900    }
901
902    /// A cache-dir attacker who replaces the cached archive with a FIFO
903    /// must not be able to hang `get_verified_archive` waiting for a
904    /// writer to connect. The pre-check + `O_NONBLOCK` belt-and-braces
905    /// returns immediately with an error, the cache hit is abandoned, and
906    /// the caller falls back to a fresh verified download.
907    #[cfg(unix)]
908    #[test]
909    fn test_fifo_cached_archive_does_not_hang() {
910        use std::time::{Duration, Instant};
911        let tmp = TempDir::new().unwrap();
912        let cache = cache_with_test_key(tmp.path());
913        let pd = priv_dir();
914
915        // Plant a legit signed entry so meta/version/sig-size are good,
916        // then replace the cached archive with a FIFO. Without the
917        // pre-check + O_NONBLOCK, opening the FIFO for reading would
918        // block until a writer connected.
919        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
920        cache.store_archive("6.0.0", &archive, &sig).unwrap();
921        let cached_archive = cache.cached_archive_path("6.0.0");
922        fs::remove_file(&cached_archive).unwrap();
923
924        // Use libc::mkfifo directly so a CI image that drops coreutils
925        // can't silently skip this test (an earlier shell-out version
926        // would hide a packaging regression). The unsafe block is scoped
927        // to the single FFI call — `mkfifo(2)` takes a NUL-terminated
928        // path, returns 0 on success and -1 on error with errno set.
929        let cstr = std::ffi::CString::new(cached_archive.as_os_str().as_encoded_bytes()).unwrap();
930        #[allow(unsafe_code)]
931        let rc = unsafe { libc::mkfifo(cstr.as_ptr(), 0o600) };
932        assert_eq!(rc, 0, "mkfifo failed: {}", std::io::Error::last_os_error());
933
934        // Measure only the cache-hit path so cold-process startup or
935        // unrelated test parallelism don't blow the budget.
936        let start = Instant::now();
937        let got = cache.get_verified_archive("6.0.0", pd.path());
938        let elapsed = start.elapsed();
939
940        assert!(
941            got.is_none(),
942            "a FIFO planted at the cached archive path must be rejected"
943        );
944        // 5s gives generous headroom on a contended CI macOS runner
945        // while still catching a real "open is blocking on the FIFO".
946        assert!(
947            elapsed < Duration::from_secs(5),
948            "open of FIFO returned in {elapsed:?}, expected ≪ 5s — \
949             pre-check or O_NONBLOCK is not catching this"
950        );
951        // Nothing should have been staged.
952        assert!(!pd.path().join("cached-6.0.0.archive").exists());
953    }
954
955    /// `.meta.json` planted as a symlink to a special file (e.g.
956    /// `/dev/zero`) is rejected by the open-handle file-type check,
957    /// without hanging or OOM'ing on the read.
958    #[cfg(unix)]
959    #[test]
960    fn test_meta_symlink_to_special_file_is_rejected() {
961        let tmp = TempDir::new().unwrap();
962        let cache = cache_with_test_key(tmp.path());
963        let pd = priv_dir();
964
965        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
966        cache.store_archive("5.1.0", &archive, &sig).unwrap();
967
968        let meta_path = cache.meta_path("5.1.0");
969        fs::remove_file(&meta_path).unwrap();
970        std::os::unix::fs::symlink("/dev/zero", &meta_path).unwrap();
971
972        assert!(
973            cache.get_verified_archive("5.1.0", pd.path()).is_none(),
974            "metadata symlink to a special file must be rejected"
975        );
976    }
977}