ant_node/upgrade/binary_cache.rs
1//! Disk cache for downloaded upgrade archives.
2//!
3//! When multiple ant-node instances detect the same upgrade, only the first
4//! one needs to download the archive. `BinaryCache` stores the **signed
5//! archive together with its detached ML-DSA-65 signature** so that
6//! subsequent nodes can reuse it.
7//!
8//! ## Security model
9//!
10//! The ML-DSA-65 signature is the security gate, and it covers the *archive*
11//! bytes — not the extracted binary. A previous version cached the extracted
12//! binary and, on a cache hit, returned it after only a SHA-256 check against
13//! a sibling metadata file. SHA-256 is not a security control: anyone able to
14//! write to the shared cache directory (a co-located process, a shared
15//! container volume, a low-privilege foothold) could replace the cached
16//! binary and its `.meta.json` with a matching hash, and the next node would
17//! execute it **without any signature verification** — persistent RCE.
18//!
19//! This module now caches the *archive + signature* and, on **every** cache
20//! hit, re-runs ML-DSA-65 verification over the cached archive before it is
21//! used. A tampered archive fails verification (the release key is pinned in
22//! the binary and cannot be forged); a tampered or missing signature fails
23//! likewise. The extracted binary is always derived fresh from the
24//! just-verified archive by the caller, so a poisoned cache entry can never
25//! be executed. The SHA-256 metadata is retained only as a fast corruption
26//! pre-check, never as the trust decision.
27//!
28//! ## Residual: cache entries are not bound to a specific release version
29//!
30//! `signature::SIGNING_CONTEXT = "ant-node-release-v1"` is constant across
31//! versions, so the ML-DSA signature attests to "this archive is a valid
32//! ant-node release", not "this archive is release X.Y.Z". An attacker with
33//! cache-dir write access who possesses any past validly-signed release can
34//! plant it under a newer version's cache key; the next node performing
35//! that upgrade accepts it and runs it as the newer version. Net effect:
36//! forced downgrade or wrong-arch crash loop, not arbitrary RCE.
37//!
38//! This is out of scope of the cache-poisoning RCE class this module
39//! addresses (which trusted SHA-256 alone on cache hits): the `cache_dir`
40//! is `0o700` (defence in depth, see `cache_dir.rs`) and the attacker
41//! already needs same-UID write to exploit this — they can replace the
42//! running binary directly. Closing the gap properly requires upstream
43//! release-signing changes (the signing context must include the version
44//! string, e.g. `b"ant-node-release-v1:1.2.3"`) and is tracked as a
45//! follow-up.
46
47use crate::error::{Error, Result};
48use crate::logging::{debug, warn};
49use crate::upgrade::signature;
50use fs2::FileExt;
51use saorsa_pqc::api::sig::MlDsaPublicKey;
52use serde::{Deserialize, Serialize};
53use sha2::{Digest, Sha256};
54use std::fs::{self, File, OpenOptions};
55use std::io::{self, Read, Write};
56use std::path::{Path, PathBuf};
57
58/// Maximum size accepted for the `.meta.json` sidecar.
59///
60/// A well-formed `CachedArchiveMeta` serialises to roughly 120 bytes; the
61/// 4 KiB cap is comfortably above any legitimate payload and tight enough
62/// that an attacker who plants a metadata file the size of `/dev/zero`
63/// cannot stall the metadata read into a hang or OOM.
64const MAX_META_BYTES: u64 = 4 * 1024;
65
66/// On-disk cache for downloaded, signature-verified upgrade archives.
67#[derive(Clone)]
68pub struct BinaryCache {
69 /// Directory that holds cached archives, signatures, and metadata.
70 cache_dir: PathBuf,
71 /// Verification key override. `None` in production → the pinned release
72 /// key embedded in [`signature`] is used (the real, unforgeable gate).
73 /// Only ever `Some` via the `#[cfg(test)]` constructor, so test builds
74 /// can exercise the cache with a generated keypair without weakening the
75 /// production trust anchor in any way.
76 verify_key: Option<MlDsaPublicKey>,
77}
78
79/// Metadata written alongside each cached archive.
80///
81/// The SHA-256 here is a fast integrity/corruption pre-check only. It is
82/// **not** a security control: the ML-DSA-65 signature over the archive is
83/// re-verified on every cache hit regardless of this value.
84#[derive(Serialize, Deserialize)]
85struct CachedArchiveMeta {
86 /// Semantic version string (e.g. "1.2.3").
87 version: String,
88 /// Hex-encoded SHA-256 digest of the cached archive (corruption check).
89 archive_sha256: String,
90 /// When the archive was cached (seconds since UNIX epoch).
91 cached_at_epoch_secs: u64,
92}
93
94impl BinaryCache {
95 /// Create a new binary cache backed by the given directory.
96 ///
97 /// Production constructor: the cache verifies cached archives against the
98 /// pinned release public key embedded in the binary.
99 #[must_use]
100 pub fn new(cache_dir: PathBuf) -> Self {
101 Self {
102 cache_dir,
103 verify_key: None,
104 }
105 }
106
107 /// Test-only constructor that verifies against an explicit public key
108 /// instead of the pinned release key (the production trust anchor is
109 /// unchanged; this only exists so unit tests can produce verifiable
110 /// signatures with a generated keypair).
111 #[cfg(test)]
112 #[must_use]
113 pub fn new_with_verify_key(cache_dir: PathBuf, verify_key: MlDsaPublicKey) -> Self {
114 Self {
115 cache_dir,
116 verify_key: Some(verify_key),
117 }
118 }
119
120 /// Path of the cached archive for `version`.
121 #[must_use]
122 pub fn cached_archive_path(&self, version: &str) -> PathBuf {
123 self.cache_dir.join(format!("ant-node-{version}.archive"))
124 }
125
126 /// Path of the cached detached signature for `version`.
127 #[must_use]
128 fn cached_signature_path(&self, version: &str) -> PathBuf {
129 self.cache_dir.join(format!("ant-node-{version}.sig"))
130 }
131
132 /// Verify `archive` against `sig` using the pinned release key in
133 /// production, or the injected test key under `#[cfg(test)]`.
134 fn verify_archive(&self, archive: &Path, sig: &Path) -> Result<()> {
135 self.verify_key.as_ref().map_or_else(
136 || signature::verify_from_file(archive, sig),
137 |key| signature::verify_from_file_with_key(archive, sig, key),
138 )
139 }
140
141 /// Copy the cached archive into the caller-private `private_dir`,
142 /// **cryptographically re-verify that private copy**, and return its
143 /// path — or `None` if there is no usable, trusted cache entry.
144 ///
145 /// On every call this:
146 /// 1. loads the sibling metadata and checks the version matches,
147 /// 2. copies the cached archive + signature into `private_dir` (a
148 /// location only this process writes, e.g. the per-upgrade temp dir),
149 /// 3. SHA-256 pre-checks the private copy against the metadata (fast
150 /// corruption check), then
151 /// 4. **re-verifies the ML-DSA-65 signature over the private copy** with
152 /// the pinned release key — the actual security gate.
153 ///
154 /// Verifying the *private copy* (not the shared cache file) closes the
155 /// TOCTOU window: an attacker with write access to the shared cache dir
156 /// cannot swap the bytes between verification and extraction, because the
157 /// caller extracts from the returned private path, which is the exact
158 /// byte sequence that was verified and is unreachable to the attacker.
159 ///
160 /// Any failure (missing/corrupt metadata, copy error, hash mismatch,
161 /// missing signature, or — critically — a signature that does not verify
162 /// against the pinned release key) returns `None`, forcing a fresh,
163 /// fully verified download.
164 ///
165 /// The caller MUST extract the binary from the returned (private) archive
166 /// path, so the executed bytes always derive from signature-verified
167 /// input that no other principal could have modified post-verification.
168 ///
169 /// `private_dir` is a load-bearing security invariant: it MUST be a
170 /// process-private, mode-`0o700` directory that no other principal
171 /// can write to. The caller in `apply.rs` creates it via
172 /// `tempfile::Builder::permissions(0o700).tempdir_in(binary_dir)` —
173 /// any future caller MUST uphold the same invariant, otherwise the
174 /// reopens by path in `sha256_file` and `verify_archive` would re-
175 /// introduce a TOCTOU window.
176 // The verifier-side cache-hit gate is read top-to-bottom by anyone
177 // auditing the security model. Splitting it into smaller helpers just
178 // to placate clippy's line limit would scatter the threat model across
179 // call sites without improving safety.
180 #[allow(clippy::too_many_lines)]
181 #[must_use]
182 pub fn get_verified_archive(&self, version: &str, private_dir: &Path) -> Option<PathBuf> {
183 let cached_archive = self.cached_archive_path(version);
184 let cached_sig = self.cached_signature_path(version);
185 let meta_path = self.meta_path(version);
186
187 // Read the metadata sidecar with a small, opened-handle size cap so
188 // an attacker with cache-dir write cannot plant `meta.json` as a
189 // symlink to `/dev/zero` (or any large/special file) and force a
190 // hang/OOM here before the archive/sig hardening runs.
191 let meta_data = {
192 let (mut meta_file, meta_len) = match open_regular_capped(&meta_path, MAX_META_BYTES) {
193 Ok(pair) => pair,
194 Err(e) => {
195 debug!("Rejecting cache metadata for {version}: {e}");
196 return None;
197 }
198 };
199 // `meta_len` is capped at MAX_META_BYTES (4 KiB), so this
200 // truncation can never happen in practice; saturating_cast
201 // makes that explicit for clippy on 32-bit targets.
202 let cap = usize::try_from(meta_len).unwrap_or(usize::MAX);
203 let mut buf = String::with_capacity(cap);
204 if let Err(e) = meta_file.read_to_string(&mut buf) {
205 debug!("Failed to read cache metadata for {version}: {e}");
206 return None;
207 }
208 buf
209 };
210 let meta: CachedArchiveMeta = serde_json::from_str(&meta_data).ok()?;
211
212 if meta.version != version {
213 debug!("Binary cache version mismatch in metadata");
214 return None;
215 }
216
217 // Open archive + signature ONCE each with size and file-type
218 // validation on the opened handles. Subsequent reads / hash /
219 // signature verification all go through the FDs opened here — there
220 // is no second path-based stat or open after this point, so an
221 // attacker who races a swap on the cache-dir paths (symlink, FIFO,
222 // device, oversized file) after these validations cannot redirect
223 // what gets staged into the private dir.
224 //
225 // Memory pressure note: `signature::verify_from_file*` reads the
226 // archive into memory in full (it is the FIPS-204 verifier's
227 // contract — message must be provided as a slice). `sha256_file`
228 // streams in 8 KiB chunks and is not an OOM vector. The
229 // `MAX_ARCHIVE_SIZE_BYTES` cap bounds the in-memory load and the
230 // staging-dir disk footprint together.
231 let (mut archive_file, archive_len) = match open_regular_capped(
232 &cached_archive,
233 crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64,
234 ) {
235 Ok(pair) => pair,
236 Err(e) => {
237 warn!("Rejecting cached archive for {version}: {e}");
238 return None;
239 }
240 };
241 let (mut sig_file, sig_len) =
242 match open_regular_capped(&cached_sig, signature::SIGNATURE_SIZE as u64) {
243 Ok(pair) => pair,
244 Err(e) => {
245 warn!("Rejecting cached signature for {version}: {e}");
246 return None;
247 }
248 };
249 if sig_len != signature::SIGNATURE_SIZE as u64 {
250 // open_regular_capped enforces ≤ max; we additionally require
251 // EXACTLY SIGNATURE_SIZE (a shorter sig is not valid ML-DSA-65).
252 warn!(
253 "Cached signature for {version} has wrong size ({sig_len} bytes, \
254 expected {})",
255 signature::SIGNATURE_SIZE
256 );
257 return None;
258 }
259
260 // Stream the validated archive + signature into the caller-private
261 // directory FROM THE ALREADY-OPEN HANDLES (not from the path), so
262 // the bytes the verifier reads are the exact bytes the open-handle
263 // metadata checks were performed against. `take()` is belt-and-
264 // braces against an attacker who extends the file after open.
265 let private_archive = private_dir.join(format!("cached-{version}.archive"));
266 let private_sig = private_dir.join(format!("cached-{version}.sig"));
267
268 let cleanup = |reason: &str| {
269 debug!("Cleaning staged cache copy for {version}: {reason}");
270 let _ = fs::remove_file(&private_archive);
271 let _ = fs::remove_file(&private_sig);
272 };
273
274 if let Err(e) = (|| -> io::Result<()> {
275 let mut dest = File::create(&private_archive)?;
276 io::copy(&mut (&mut archive_file).take(archive_len), &mut dest)?;
277 Ok(())
278 })() {
279 debug!("Could not stage cached archive for {version}: {e}");
280 cleanup("archive copy failed");
281 return None;
282 }
283 if let Err(e) = (|| -> io::Result<()> {
284 let mut dest = File::create(&private_sig)?;
285 io::copy(&mut (&mut sig_file).take(sig_len), &mut dest)?;
286 Ok(())
287 })() {
288 debug!("Could not stage cached signature for {version}: {e}");
289 cleanup("signature copy failed");
290 return None;
291 }
292
293 // Fast corruption pre-check on the PRIVATE copy (NOT the security
294 // decision). A copy error or truncation surfaces here.
295 let actual_hash = match sha256_file(&private_archive) {
296 Ok(h) => h,
297 Err(e) => {
298 cleanup(&format!("sha256 read failed: {e}"));
299 return None;
300 }
301 };
302 if actual_hash != meta.archive_sha256 {
303 warn!(
304 "Binary cache SHA-256 mismatch for version {version} \
305 (expected {}, got {actual_hash}) — ignoring cache entry",
306 meta.archive_sha256
307 );
308 cleanup("sha256 mismatch");
309 return None;
310 }
311
312 // THE SECURITY GATE: re-verify the ML-DSA-65 signature over the
313 // PRIVATE archive copy on every hit. The returned path is this same
314 // private copy, so the caller extracts exactly the bytes that were
315 // verified — a cache entry tampered with on disk (binary/archive
316 // swap, forged metadata, or a post-verify swap attempt) cannot
317 // produce a private copy whose signature verifies against the
318 // pinned release key.
319 if let Err(e) = self.verify_archive(&private_archive, &private_sig) {
320 warn!(
321 "Cached archive for version {version} FAILED ML-DSA signature \
322 re-verification ({e}); discarding cache entry (possible \
323 on-disk tampering). A fresh verified download will run."
324 );
325 cleanup("signature re-verification failed");
326 return None;
327 }
328
329 debug!("Cached archive for version {version} passed ML-DSA re-verification");
330 Some(private_archive)
331 }
332
333 /// Store a signature-verified archive in the cache.
334 ///
335 /// Both files are persisted (via write-to-temp-then-rename so readers
336 /// never observe partial writes); the metadata file is written last so
337 /// [`get_verified_archive`](Self::get_verified_archive) only succeeds
338 /// once every file is complete.
339 ///
340 /// Defence in depth: this re-verifies the archive against its signature
341 /// before caching, so a poisoned entry cannot be created through the
342 /// supported path even if a caller forgot to verify first.
343 ///
344 /// # Errors
345 ///
346 /// Returns an error if the signature does not verify, the inputs cannot
347 /// be read, or the cache files cannot be written.
348 pub fn store_archive(
349 &self,
350 version: &str,
351 archive_path: &Path,
352 signature_path: &Path,
353 ) -> Result<()> {
354 // Defence in depth: refuse to persist a non-regular file, an
355 // oversize archive, or a misshapen signature — mirroring the
356 // `get_verified_archive` cache-hit policy. `symlink_metadata`
357 // refuses to chase a symlink the caller may have planted.
358 //
359 // Note the intentional asymmetry with `open_regular_capped`
360 // (which uses `fs::metadata` and DOES follow symlinks): on the
361 // store path the source file is supplied by the caller (typically
362 // a path under our control after download), so a symlink there is
363 // surprising and worth rejecting. On the read path the cache dir
364 // is shared and an attacker may have planted a symlink — but the
365 // attacker already has write access, so chasing a symlink-to-
366 // regular is no worse than them editing the regular file
367 // directly, while still letting the post-open `is_file()` reject
368 // symlink-to-special.
369 let archive_meta = fs::symlink_metadata(archive_path)?;
370 if !archive_meta.file_type().is_file() {
371 return Err(Error::Upgrade(format!(
372 "Refusing to cache archive for {version}: source is not a \
373 regular file (symlink/special)"
374 )));
375 }
376 let archive_len = archive_meta.len();
377 if archive_len > crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 {
378 return Err(Error::Upgrade(format!(
379 "Refusing to cache archive for {version}: size {archive_len} bytes \
380 exceeds MAX_ARCHIVE_SIZE_BYTES"
381 )));
382 }
383 let sig_meta = fs::symlink_metadata(signature_path)?;
384 if !sig_meta.file_type().is_file() {
385 return Err(Error::Upgrade(format!(
386 "Refusing to cache archive for {version}: signature is not a \
387 regular file (symlink/special)"
388 )));
389 }
390 let sig_len = sig_meta.len();
391 if sig_len != signature::SIGNATURE_SIZE as u64 {
392 return Err(Error::Upgrade(format!(
393 "Refusing to cache archive for {version}: signature size {sig_len} \
394 bytes, expected {}",
395 signature::SIGNATURE_SIZE
396 )));
397 }
398
399 self.verify_archive(archive_path, signature_path)
400 .map_err(|e| {
401 Error::Upgrade(format!(
402 "Refusing to cache archive for {version}: signature does not verify ({e})"
403 ))
404 })?;
405
406 let archive_hash = sha256_file(archive_path)?;
407
408 let dest_archive = self.cached_archive_path(version);
409 let dest_sig = self.cached_signature_path(version);
410 let meta_path = self.meta_path(version);
411
412 Self::atomic_copy(
413 archive_path,
414 &dest_archive,
415 &self
416 .cache_dir
417 .join(format!(".ant-node-{version}.archive.tmp")),
418 )?;
419 Self::atomic_copy(
420 signature_path,
421 &dest_sig,
422 &self.cache_dir.join(format!(".ant-node-{version}.sig.tmp")),
423 )?;
424
425 let now = std::time::SystemTime::now()
426 .duration_since(std::time::UNIX_EPOCH)
427 .map_err(|e| Error::Upgrade(format!("System clock error: {e}")))?
428 .as_secs();
429
430 let meta = CachedArchiveMeta {
431 version: version.to_string(),
432 archive_sha256: archive_hash,
433 cached_at_epoch_secs: now,
434 };
435
436 let meta_json = serde_json::to_string(&meta).map_err(|e| {
437 Error::Upgrade(format!("Failed to serialize cached archive metadata: {e}"))
438 })?;
439
440 // Metadata written last so a reader never sees a complete meta file
441 // pointing at an incomplete archive/signature pair.
442 let tmp_meta = self.cache_dir.join(format!(".ant-node-{version}.meta.tmp"));
443 let mut f = File::create(&tmp_meta)?;
444 f.write_all(meta_json.as_bytes())?;
445 f.sync_all()?;
446 drop(f);
447 let _ = fs::remove_file(&meta_path);
448 fs::rename(&tmp_meta, &meta_path)?;
449
450 debug!(
451 "Cached verified archive for version {version} at {}",
452 dest_archive.display()
453 );
454 Ok(())
455 }
456
457 /// Acquire an exclusive download lock and return the guard.
458 ///
459 /// This prevents multiple nodes from downloading the same archive
460 /// concurrently — the first acquires the lock and downloads, the rest
461 /// wait and then find the archive already cached.
462 ///
463 /// The lock is released when the returned guard is dropped.
464 ///
465 /// **Note:** `lock_exclusive()` blocks the calling thread. Callers in
466 /// async contexts should wrap this call in `tokio::task::spawn_blocking`.
467 ///
468 /// # Errors
469 ///
470 /// Returns an error if the lock file cannot be created or acquired.
471 pub fn acquire_download_lock(&self) -> Result<DownloadLockGuard> {
472 let lock_path = self.cache_dir.join("download.lock");
473 let lock = File::create(&lock_path)
474 .map_err(|e| Error::Upgrade(format!("Failed to create download lock: {e}")))?;
475 lock.lock_exclusive()
476 .map_err(|e| Error::Upgrade(format!("Failed to acquire download lock: {e}")))?;
477 Ok(DownloadLockGuard { _file: lock })
478 }
479
480 // -- private helpers -----------------------------------------------------
481
482 /// Copy `src` to `dest` atomically via a temp file + rename.
483 fn atomic_copy(src: &Path, dest: &Path, tmp: &Path) -> Result<()> {
484 fs::copy(src, tmp)?;
485 // Remove dest first on Windows where rename fails if it exists.
486 let _ = fs::remove_file(dest);
487 fs::rename(tmp, dest)?;
488 Ok(())
489 }
490
491 fn meta_path(&self, version: &str) -> PathBuf {
492 self.cache_dir.join(format!("ant-node-{version}.meta.json"))
493 }
494}
495
496/// RAII guard that holds an exclusive download lock.
497///
498/// The underlying file lock is released when this guard is dropped.
499pub struct DownloadLockGuard {
500 _file: File,
501}
502
503/// Open `path` as a regular file with size at most `max_len`, validating
504/// the metadata on the **opened handle** so a race between any prior stat
505/// and the read cannot substitute a special file (FIFO/device/socket) or
506/// an oversized payload. A symlink whose target is a regular file is
507/// accepted (it's just an indirect path to a regular file — the attacker
508/// who placed the link already needed write access to the cache dir, the
509/// same access level as directly editing the regular file); a symlink
510/// whose target is a special file is rejected by the `is_file()` check on
511/// the opened handle.
512///
513/// On Unix, `open()` of a FIFO/named-pipe for reading blocks until a
514/// writer connects, so a cache-dir attacker could otherwise hang the
515/// upgrade indefinitely by planting a FIFO at the cache entry's path. We
516/// (a) reject non-regular files via a `fs::metadata()` pre-check (follows
517/// symlinks, so a symlink-to-regular is still accepted), and (b) on Unix
518/// also open with `O_NONBLOCK` as a belt-and-braces defence in case the
519/// pre-check races a swap. The post-open `is_file()` on the opened handle
520/// remains the TOCTOU-safe gate.
521///
522/// Returns `(File, len)` on success; the returned `File` is positioned at
523/// offset 0 and may be `io::copy`'d into a destination — callers should
524/// wrap with `Read::take(max_len)` so an attacker who extends the file
525/// after the metadata read cannot stream beyond the cap.
526fn open_regular_capped(path: &Path, max_len: u64) -> io::Result<(File, u64)> {
527 // Pre-check: refuse to even open a non-regular file. This is the
528 // first line of defence against an attacker who planted a FIFO at
529 // `path` — opening a FIFO for reading on Unix blocks until a writer
530 // connects, hanging the upgrade indefinitely. `fs::metadata` follows
531 // symlinks, so a symlink whose target is a regular file is accepted
532 // here and a symlink whose target is a FIFO/device/socket is rejected.
533 let pre_meta = fs::metadata(path)?;
534 if !pre_meta.file_type().is_file() {
535 return Err(io::Error::new(
536 io::ErrorKind::InvalidInput,
537 "not a regular file (FIFO/device/socket/dir)",
538 ));
539 }
540
541 // Belt-and-braces against a pre-check vs open() race: on Unix also
542 // open with O_NONBLOCK, so even if an attacker swaps the regular file
543 // for a FIFO between the metadata read and open(), the open() returns
544 // immediately instead of blocking on a writer. Reads on a regular file
545 // ignore O_NONBLOCK, so this is a no-op for the happy path. The
546 // post-open is_file() check below still catches the swap.
547 let file = {
548 let mut opts = OpenOptions::new();
549 opts.read(true);
550 #[cfg(unix)]
551 {
552 use std::os::unix::fs::OpenOptionsExt;
553 // `O_NONBLOCK` is per-arch on Linux (0o4000 on x86/arm/aarch64
554 // /riscv, 0o200 on mips, 0x4000 on sparc, etc.). Use `libc`
555 // so we always pick the right constant for the target arch
556 // instead of silently setting a different flag. Reads on a
557 // regular file ignore `O_NONBLOCK` on all our supported
558 // platforms, so this is a no-op for the happy path.
559 opts.custom_flags(libc::O_NONBLOCK);
560 }
561 opts.open(path)?
562 };
563 let meta = file.metadata()?;
564 if !meta.file_type().is_file() {
565 return Err(io::Error::new(
566 io::ErrorKind::InvalidInput,
567 "not a regular file (FIFO/device/socket/dir)",
568 ));
569 }
570 let len = meta.len();
571 if len > max_len {
572 return Err(io::Error::new(
573 io::ErrorKind::InvalidInput,
574 format!("file exceeds size cap ({len} > {max_len})"),
575 ));
576 }
577 Ok((file, len))
578}
579
580/// Compute the hex-encoded SHA-256 digest of a file.
581fn sha256_file(path: &Path) -> Result<String> {
582 let mut file = File::open(path)?;
583 let mut hasher = Sha256::new();
584 let mut buf = [0u8; 8192];
585 loop {
586 let n = file
587 .read(&mut buf)
588 .map_err(|e| Error::Upgrade(format!("Failed to read file for hashing: {e}")))?;
589 if n == 0 {
590 break;
591 }
592 hasher.update(&buf[..n]);
593 }
594 Ok(hex::encode(hasher.finalize()))
595}
596
597// ---------------------------------------------------------------------------
598// Tests
599// ---------------------------------------------------------------------------
600
601#[cfg(test)]
602#[allow(clippy::unwrap_used, clippy::expect_used)]
603mod tests {
604 use super::*;
605 use saorsa_pqc::api::sig::{ml_dsa_65, MlDsaPublicKey, MlDsaSecretKey};
606 use std::sync::OnceLock;
607 use tempfile::TempDir;
608
609 /// One generated keypair for the whole test module (keygen is expensive).
610 fn test_keypair() -> &'static (MlDsaPublicKey, MlDsaSecretKey) {
611 static KP: OnceLock<(MlDsaPublicKey, MlDsaSecretKey)> = OnceLock::new();
612 KP.get_or_init(|| ml_dsa_65().generate_keypair().unwrap())
613 }
614
615 fn cache_with_test_key(dir: &Path) -> BinaryCache {
616 BinaryCache::new_with_verify_key(dir.to_path_buf(), test_keypair().0.clone())
617 }
618
619 /// A caller-private staging directory (the per-upgrade temp dir in
620 /// production). Returned so it outlives the call.
621 fn priv_dir() -> TempDir {
622 TempDir::new().unwrap()
623 }
624
625 /// Write an archive + a valid detached signature over it.
626 fn make_signed_archive(dir: &Path, contents: &[u8]) -> (PathBuf, PathBuf) {
627 let archive = dir.join("src-archive");
628 fs::write(&archive, contents).unwrap();
629 let sig = ml_dsa_65()
630 .sign_with_context(&test_keypair().1, contents, signature::SIGNING_CONTEXT)
631 .unwrap();
632 let sig_path = dir.join("src-archive.sig");
633 fs::write(&sig_path, sig.to_bytes()).unwrap();
634 (archive, sig_path)
635 }
636
637 #[test]
638 fn test_miss_returns_none() {
639 let tmp = TempDir::new().unwrap();
640 let cache = cache_with_test_key(tmp.path());
641 let pd = priv_dir();
642 assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
643 }
644
645 #[test]
646 fn test_store_and_get_verified_archive() {
647 let tmp = TempDir::new().unwrap();
648 let cache = cache_with_test_key(tmp.path());
649 let pd = priv_dir();
650
651 let (archive, sig) = make_signed_archive(tmp.path(), b"signed archive bytes");
652 cache.store_archive("1.2.3", &archive, &sig).unwrap();
653
654 let got = cache
655 .get_verified_archive("1.2.3", pd.path())
656 .expect("cache hit");
657 assert_eq!(fs::read(&got).unwrap(), b"signed archive bytes");
658 // The returned path must be the PRIVATE copy, not the shared cache
659 // file (that is what closes the verify/extract TOCTOU).
660 assert!(
661 got.starts_with(pd.path()),
662 "returned archive must be the caller-private copy, got {got:?}"
663 );
664 assert_ne!(got, cache.cached_archive_path("1.2.3"));
665 }
666
667 #[test]
668 fn test_store_rejects_unsigned_archive() {
669 let tmp = TempDir::new().unwrap();
670 let cache = cache_with_test_key(tmp.path());
671 let pd = priv_dir();
672
673 let archive = tmp.path().join("a");
674 fs::write(&archive, b"unsigned").unwrap();
675 let bad_sig = tmp.path().join("a.sig");
676 fs::write(&bad_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap();
677
678 assert!(cache.store_archive("1.0.0", &archive, &bad_sig).is_err());
679 assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
680 }
681
682 /// An attacker who swaps the cached archive on disk (and even forges a
683 /// matching SHA-256 in the metadata) cannot get it trusted, because
684 /// the ML-DSA signature is re-verified on every hit.
685 #[test]
686 fn test_tampered_cached_archive_is_rejected() {
687 let tmp = TempDir::new().unwrap();
688 let cache = cache_with_test_key(tmp.path());
689 let pd = priv_dir();
690
691 let (archive, sig) = make_signed_archive(tmp.path(), b"legit release archive");
692 cache.store_archive("2.0.0", &archive, &sig).unwrap();
693 assert!(cache.get_verified_archive("2.0.0", pd.path()).is_some());
694
695 // Attacker overwrites the cached archive with a malicious payload...
696 let cached_archive = cache.cached_archive_path("2.0.0");
697 fs::write(&cached_archive, b"malicious payload").unwrap();
698
699 // ...and forges the metadata SHA-256 so the corruption pre-check passes.
700 let forged_hash = {
701 let mut h = Sha256::new();
702 h.update(b"malicious payload");
703 hex::encode(h.finalize())
704 };
705 let meta = CachedArchiveMeta {
706 version: "2.0.0".to_string(),
707 archive_sha256: forged_hash,
708 cached_at_epoch_secs: 0,
709 };
710 fs::write(
711 cache.meta_path("2.0.0"),
712 serde_json::to_string(&meta).unwrap(),
713 )
714 .unwrap();
715
716 // The SHA-256 pre-check now passes, but ML-DSA re-verification of the
717 // swapped archive against the key fails → entry rejected.
718 assert!(
719 cache.get_verified_archive("2.0.0", pd.path()).is_none(),
720 "tampered cache entry must NOT be trusted even with a forged \
721 matching SHA-256 — the signature gate runs on every hit"
722 );
723 }
724
725 /// TOCTOU defence: even if an attacker swaps the *shared* cache archive
726 /// for malicious bytes immediately after a hit, the previously returned
727 /// path (a caller-private copy) still contains the verified bytes, so
728 /// what gets extracted/executed is exactly what was signature-verified.
729 #[test]
730 fn test_returned_archive_is_private_copy_immune_to_post_hit_swap() {
731 let tmp = TempDir::new().unwrap();
732 let cache = cache_with_test_key(tmp.path());
733 let pd = priv_dir();
734
735 let (archive, sig) = make_signed_archive(tmp.path(), b"the real signed release");
736 cache.store_archive("3.0.0", &archive, &sig).unwrap();
737
738 let verified = cache
739 .get_verified_archive("3.0.0", pd.path())
740 .expect("cache hit");
741
742 // Attacker swaps the SHARED cache archive right after verification.
743 fs::write(
744 cache.cached_archive_path("3.0.0"),
745 b"post-verify malicious swap",
746 )
747 .unwrap();
748
749 // The path the caller will extract from is the private copy and is
750 // unaffected by the shared-file swap.
751 assert_eq!(
752 fs::read(&verified).unwrap(),
753 b"the real signed release",
754 "extraction must read the verified private bytes, not the \
755 attacker's post-verification swap"
756 );
757 }
758
759 #[test]
760 fn test_missing_signature_returns_none() {
761 let tmp = TempDir::new().unwrap();
762 let cache = cache_with_test_key(tmp.path());
763 let pd = priv_dir();
764
765 let (archive, sig) = make_signed_archive(tmp.path(), b"data");
766 cache.store_archive("1.0.0", &archive, &sig).unwrap();
767
768 // Attacker deletes the signature to try to skip verification.
769 fs::remove_file(cache.cached_signature_path("1.0.0")).unwrap();
770 assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
771 }
772
773 #[test]
774 fn test_missing_meta_returns_none() {
775 let tmp = TempDir::new().unwrap();
776 let cache = cache_with_test_key(tmp.path());
777 let pd = priv_dir();
778 let (archive, sig) = make_signed_archive(tmp.path(), b"data");
779 cache.store_archive("1.0.0", &archive, &sig).unwrap();
780 fs::remove_file(cache.meta_path("1.0.0")).unwrap();
781 assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
782 }
783
784 /// Size policy: an attacker with cache-dir write cannot OOM/disk-exhaust
785 /// the verifier by dropping a multi-GB archive — `get_verified_archive`
786 /// stat-checks the cached archive against `MAX_ARCHIVE_SIZE_BYTES` BEFORE
787 /// any copy or `fs::read` reaches `signature::verify_from_file`.
788 #[test]
789 fn test_oversize_cached_archive_is_rejected_before_copy() {
790 let tmp = TempDir::new().unwrap();
791 let cache = cache_with_test_key(tmp.path());
792 let pd = priv_dir();
793
794 // Plant a real signed entry so the meta/sig pass earlier checks…
795 let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
796 cache.store_archive("3.1.0", &archive, &sig).unwrap();
797 // …then truncate-grow the cached archive past the limit.
798 let cached_archive = cache.cached_archive_path("3.1.0");
799 let oversize = crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1;
800 {
801 let f = File::create(&cached_archive).unwrap();
802 f.set_len(oversize).unwrap();
803 }
804
805 // The size gate rejects pre-copy → no private archive ever staged.
806 assert!(cache.get_verified_archive("3.1.0", pd.path()).is_none());
807 let private_archive = pd.path().join("cached-3.1.0.archive");
808 assert!(
809 !private_archive.exists(),
810 "oversize entry must NOT be staged into private dir"
811 );
812 }
813
814 #[test]
815 fn test_wrong_size_signature_is_rejected_before_copy() {
816 let tmp = TempDir::new().unwrap();
817 let cache = cache_with_test_key(tmp.path());
818 let pd = priv_dir();
819
820 let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
821 cache.store_archive("3.2.0", &archive, &sig).unwrap();
822 // Replace the cached signature with the wrong size.
823 fs::write(cache.cached_signature_path("3.2.0"), b"too-short").unwrap();
824
825 assert!(cache.get_verified_archive("3.2.0", pd.path()).is_none());
826 }
827
828 /// `store_archive` itself refuses to persist an oversize archive — even
829 /// from a (hypothetically) misbehaving caller that bypassed the
830 /// download-time size cap.
831 #[test]
832 fn test_store_archive_rejects_oversize() {
833 let tmp = TempDir::new().unwrap();
834 let cache = cache_with_test_key(tmp.path());
835
836 // Make a sparse "archive" past the limit and any signature.
837 let big = tmp.path().join("big.archive");
838 {
839 let f = File::create(&big).unwrap();
840 f.set_len(crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1)
841 .unwrap();
842 }
843 let any_sig = tmp.path().join("any.sig");
844 fs::write(&any_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap();
845
846 assert!(cache.store_archive("9.9.9", &big, &any_sig).is_err());
847 }
848
849 /// Round-3 regression: a cache-dir writer cannot bypass the size gate
850 /// by planting a symlink whose `stat(2)` size is small but whose
851 /// target reads indefinitely (e.g. `/dev/zero`). `symlink_metadata`
852 /// + `is_file()` rejects the entry before any `fs::copy` reads it.
853 #[cfg(unix)]
854 #[test]
855 fn test_symlink_cached_archive_is_rejected_before_copy() {
856 let tmp = TempDir::new().unwrap();
857 let cache = cache_with_test_key(tmp.path());
858 let pd = priv_dir();
859
860 // Plant a legit signed entry so meta/version/sig-size are good…
861 let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
862 cache.store_archive("4.0.0", &archive, &sig).unwrap();
863 // …then replace the cached archive with a symlink to /dev/zero.
864 let cached_archive = cache.cached_archive_path("4.0.0");
865 fs::remove_file(&cached_archive).unwrap();
866 std::os::unix::fs::symlink("/dev/zero", &cached_archive).unwrap();
867
868 assert!(
869 cache.get_verified_archive("4.0.0", pd.path()).is_none(),
870 "a symlinked cached archive must be rejected pre-copy, \
871 not chased into /dev/zero"
872 );
873 // Nothing should have been staged.
874 assert!(!pd.path().join("cached-4.0.0.archive").exists());
875 }
876
877 /// `.meta.json` is read through the same size/file-type gate as the
878 /// archive and signature: planting a multi-MB metadata file (or a
879 /// metadata symlink to a special file) is rejected pre-parse without
880 /// risking a hang or large allocation.
881 #[test]
882 fn test_oversized_meta_is_rejected() {
883 let tmp = TempDir::new().unwrap();
884 let cache = cache_with_test_key(tmp.path());
885 let pd = priv_dir();
886
887 // Establish a valid entry so archive/sig are well-formed.
888 let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
889 cache.store_archive("5.0.0", &archive, &sig).unwrap();
890
891 // Overwrite meta with a file well above MAX_META_BYTES of garbage.
892 let meta_path = cache.meta_path("5.0.0");
893 let huge = vec![b'a'; usize::try_from(MAX_META_BYTES).unwrap_or(usize::MAX) + 1024];
894 fs::write(&meta_path, &huge).unwrap();
895
896 assert!(
897 cache.get_verified_archive("5.0.0", pd.path()).is_none(),
898 "oversized metadata file must be rejected before parsing"
899 );
900 }
901
902 /// A cache-dir attacker who replaces the cached archive with a FIFO
903 /// must not be able to hang `get_verified_archive` waiting for a
904 /// writer to connect. The pre-check + `O_NONBLOCK` belt-and-braces
905 /// returns immediately with an error, the cache hit is abandoned, and
906 /// the caller falls back to a fresh verified download.
907 #[cfg(unix)]
908 #[test]
909 fn test_fifo_cached_archive_does_not_hang() {
910 use std::time::{Duration, Instant};
911 let tmp = TempDir::new().unwrap();
912 let cache = cache_with_test_key(tmp.path());
913 let pd = priv_dir();
914
915 // Plant a legit signed entry so meta/version/sig-size are good,
916 // then replace the cached archive with a FIFO. Without the
917 // pre-check + O_NONBLOCK, opening the FIFO for reading would
918 // block until a writer connected.
919 let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
920 cache.store_archive("6.0.0", &archive, &sig).unwrap();
921 let cached_archive = cache.cached_archive_path("6.0.0");
922 fs::remove_file(&cached_archive).unwrap();
923
924 // Use libc::mkfifo directly so a CI image that drops coreutils
925 // can't silently skip this test (an earlier shell-out version
926 // would hide a packaging regression). The unsafe block is scoped
927 // to the single FFI call — `mkfifo(2)` takes a NUL-terminated
928 // path, returns 0 on success and -1 on error with errno set.
929 let cstr = std::ffi::CString::new(cached_archive.as_os_str().as_encoded_bytes()).unwrap();
930 #[allow(unsafe_code)]
931 let rc = unsafe { libc::mkfifo(cstr.as_ptr(), 0o600) };
932 assert_eq!(rc, 0, "mkfifo failed: {}", std::io::Error::last_os_error());
933
934 // Measure only the cache-hit path so cold-process startup or
935 // unrelated test parallelism don't blow the budget.
936 let start = Instant::now();
937 let got = cache.get_verified_archive("6.0.0", pd.path());
938 let elapsed = start.elapsed();
939
940 assert!(
941 got.is_none(),
942 "a FIFO planted at the cached archive path must be rejected"
943 );
944 // 5s gives generous headroom on a contended CI macOS runner
945 // while still catching a real "open is blocking on the FIFO".
946 assert!(
947 elapsed < Duration::from_secs(5),
948 "open of FIFO returned in {elapsed:?}, expected ≪ 5s — \
949 pre-check or O_NONBLOCK is not catching this"
950 );
951 // Nothing should have been staged.
952 assert!(!pd.path().join("cached-6.0.0.archive").exists());
953 }
954
955 /// `.meta.json` planted as a symlink to a special file (e.g.
956 /// `/dev/zero`) is rejected by the open-handle file-type check,
957 /// without hanging or OOM'ing on the read.
958 #[cfg(unix)]
959 #[test]
960 fn test_meta_symlink_to_special_file_is_rejected() {
961 let tmp = TempDir::new().unwrap();
962 let cache = cache_with_test_key(tmp.path());
963 let pd = priv_dir();
964
965 let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
966 cache.store_archive("5.1.0", &archive, &sig).unwrap();
967
968 let meta_path = cache.meta_path("5.1.0");
969 fs::remove_file(&meta_path).unwrap();
970 std::os::unix::fs::symlink("/dev/zero", &meta_path).unwrap();
971
972 assert!(
973 cache.get_verified_archive("5.1.0", pd.path()).is_none(),
974 "metadata symlink to a special file must be rejected"
975 );
976 }
977}