ant-node 0.11.5

Pure quantum-proof network node for the Autonomi decentralized network
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
//! Disk cache for downloaded upgrade archives.
//!
//! When multiple ant-node instances detect the same upgrade, only the first
//! one needs to download the archive. `BinaryCache` stores the **signed
//! archive together with its detached ML-DSA-65 signature** so that
//! subsequent nodes can reuse it.
//!
//! ## Security model
//!
//! The ML-DSA-65 signature is the security gate, and it covers the *archive*
//! bytes — not the extracted binary. A previous version cached the extracted
//! binary and, on a cache hit, returned it after only a SHA-256 check against
//! a sibling metadata file. SHA-256 is not a security control: anyone able to
//! write to the shared cache directory (a co-located process, a shared
//! container volume, a low-privilege foothold) could replace the cached
//! binary and its `.meta.json` with a matching hash, and the next node would
//! execute it **without any signature verification** — persistent RCE.
//!
//! This module now caches the *archive + signature* and, on **every** cache
//! hit, re-runs ML-DSA-65 verification over the cached archive before it is
//! used. A tampered archive fails verification (the release key is pinned in
//! the binary and cannot be forged); a tampered or missing signature fails
//! likewise. The extracted binary is always derived fresh from the
//! just-verified archive by the caller, so a poisoned cache entry can never
//! be executed. The SHA-256 metadata is retained only as a fast corruption
//! pre-check, never as the trust decision.
//!
//! ## Residual: cache entries are not bound to a specific release version
//!
//! `signature::SIGNING_CONTEXT = "ant-node-release-v1"` is constant across
//! versions, so the ML-DSA signature attests to "this archive is a valid
//! ant-node release", not "this archive is release X.Y.Z". An attacker with
//! cache-dir write access who possesses any past validly-signed release can
//! plant it under a newer version's cache key; the next node performing
//! that upgrade accepts it and runs it as the newer version. Net effect:
//! forced downgrade or wrong-arch crash loop, not arbitrary RCE.
//!
//! This is out of scope of the cache-poisoning RCE class this module
//! addresses (which trusted SHA-256 alone on cache hits): the `cache_dir`
//! is `0o700` (defence in depth, see `cache_dir.rs`) and the attacker
//! already needs same-UID write to exploit this — they can replace the
//! running binary directly. Closing the gap properly requires upstream
//! release-signing changes (the signing context must include the version
//! string, e.g. `b"ant-node-release-v1:1.2.3"`) and is tracked as a
//! follow-up.

use crate::error::{Error, Result};
use crate::logging::{debug, warn};
use crate::upgrade::signature;
use fs2::FileExt;
use saorsa_pqc::api::sig::MlDsaPublicKey;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::fs::{self, File, OpenOptions};
use std::io::{self, Read, Write};
use std::path::{Path, PathBuf};

/// Maximum size accepted for the `.meta.json` sidecar.
///
/// A well-formed `CachedArchiveMeta` serialises to roughly 120 bytes; the
/// 4 KiB cap is comfortably above any legitimate payload and tight enough
/// that an attacker who plants a metadata file the size of `/dev/zero`
/// cannot stall the metadata read into a hang or OOM.
const MAX_META_BYTES: u64 = 4 * 1024;

/// On-disk cache for downloaded, signature-verified upgrade archives.
#[derive(Clone)]
pub struct BinaryCache {
    /// Directory that holds cached archives, signatures, and metadata.
    cache_dir: PathBuf,
    /// Verification key override. `None` in production → the pinned release
    /// key embedded in [`signature`] is used (the real, unforgeable gate).
    /// Only ever `Some` via the `#[cfg(test)]` constructor, so test builds
    /// can exercise the cache with a generated keypair without weakening the
    /// production trust anchor in any way.
    verify_key: Option<MlDsaPublicKey>,
}

/// Metadata written alongside each cached archive.
///
/// The SHA-256 here is a fast integrity/corruption pre-check only. It is
/// **not** a security control: the ML-DSA-65 signature over the archive is
/// re-verified on every cache hit regardless of this value.
#[derive(Serialize, Deserialize)]
struct CachedArchiveMeta {
    /// Semantic version string (e.g. "1.2.3").
    version: String,
    /// Hex-encoded SHA-256 digest of the cached archive (corruption check).
    archive_sha256: String,
    /// When the archive was cached (seconds since UNIX epoch).
    cached_at_epoch_secs: u64,
}

impl BinaryCache {
    /// Create a new binary cache backed by the given directory.
    ///
    /// Production constructor: the cache verifies cached archives against the
    /// pinned release public key embedded in the binary.
    #[must_use]
    pub fn new(cache_dir: PathBuf) -> Self {
        Self {
            cache_dir,
            verify_key: None,
        }
    }

    /// Test-only constructor that verifies against an explicit public key
    /// instead of the pinned release key (the production trust anchor is
    /// unchanged; this only exists so unit tests can produce verifiable
    /// signatures with a generated keypair).
    #[cfg(test)]
    #[must_use]
    pub fn new_with_verify_key(cache_dir: PathBuf, verify_key: MlDsaPublicKey) -> Self {
        Self {
            cache_dir,
            verify_key: Some(verify_key),
        }
    }

    /// Path of the cached archive for `version`.
    #[must_use]
    pub fn cached_archive_path(&self, version: &str) -> PathBuf {
        self.cache_dir.join(format!("ant-node-{version}.archive"))
    }

    /// Path of the cached detached signature for `version`.
    #[must_use]
    fn cached_signature_path(&self, version: &str) -> PathBuf {
        self.cache_dir.join(format!("ant-node-{version}.sig"))
    }

    /// Verify `archive` against `sig` using the pinned release key in
    /// production, or the injected test key under `#[cfg(test)]`.
    fn verify_archive(&self, archive: &Path, sig: &Path) -> Result<()> {
        self.verify_key.as_ref().map_or_else(
            || signature::verify_from_file(archive, sig),
            |key| signature::verify_from_file_with_key(archive, sig, key),
        )
    }

    /// Copy the cached archive into the caller-private `private_dir`,
    /// **cryptographically re-verify that private copy**, and return its
    /// path — or `None` if there is no usable, trusted cache entry.
    ///
    /// On every call this:
    /// 1. loads the sibling metadata and checks the version matches,
    /// 2. copies the cached archive + signature into `private_dir` (a
    ///    location only this process writes, e.g. the per-upgrade temp dir),
    /// 3. SHA-256 pre-checks the private copy against the metadata (fast
    ///    corruption check), then
    /// 4. **re-verifies the ML-DSA-65 signature over the private copy** with
    ///    the pinned release key — the actual security gate.
    ///
    /// Verifying the *private copy* (not the shared cache file) closes the
    /// TOCTOU window: an attacker with write access to the shared cache dir
    /// cannot swap the bytes between verification and extraction, because the
    /// caller extracts from the returned private path, which is the exact
    /// byte sequence that was verified and is unreachable to the attacker.
    ///
    /// Any failure (missing/corrupt metadata, copy error, hash mismatch,
    /// missing signature, or — critically — a signature that does not verify
    /// against the pinned release key) returns `None`, forcing a fresh,
    /// fully verified download.
    ///
    /// The caller MUST extract the binary from the returned (private) archive
    /// path, so the executed bytes always derive from signature-verified
    /// input that no other principal could have modified post-verification.
    ///
    /// `private_dir` is a load-bearing security invariant: it MUST be a
    /// process-private, mode-`0o700` directory that no other principal
    /// can write to. The caller in `apply.rs` creates it via
    /// `tempfile::Builder::permissions(0o700).tempdir_in(binary_dir)` —
    /// any future caller MUST uphold the same invariant, otherwise the
    /// reopens by path in `sha256_file` and `verify_archive` would re-
    /// introduce a TOCTOU window.
    // The verifier-side cache-hit gate is read top-to-bottom by anyone
    // auditing the security model. Splitting it into smaller helpers just
    // to placate clippy's line limit would scatter the threat model across
    // call sites without improving safety.
    #[allow(clippy::too_many_lines)]
    #[must_use]
    pub fn get_verified_archive(&self, version: &str, private_dir: &Path) -> Option<PathBuf> {
        let cached_archive = self.cached_archive_path(version);
        let cached_sig = self.cached_signature_path(version);
        let meta_path = self.meta_path(version);

        // Read the metadata sidecar with a small, opened-handle size cap so
        // an attacker with cache-dir write cannot plant `meta.json` as a
        // symlink to `/dev/zero` (or any large/special file) and force a
        // hang/OOM here before the archive/sig hardening runs.
        let meta_data = {
            let (mut meta_file, meta_len) = match open_regular_capped(&meta_path, MAX_META_BYTES) {
                Ok(pair) => pair,
                Err(e) => {
                    debug!("Rejecting cache metadata for {version}: {e}");
                    return None;
                }
            };
            // `meta_len` is capped at MAX_META_BYTES (4 KiB), so this
            // truncation can never happen in practice; saturating_cast
            // makes that explicit for clippy on 32-bit targets.
            let cap = usize::try_from(meta_len).unwrap_or(usize::MAX);
            let mut buf = String::with_capacity(cap);
            if let Err(e) = meta_file.read_to_string(&mut buf) {
                debug!("Failed to read cache metadata for {version}: {e}");
                return None;
            }
            buf
        };
        let meta: CachedArchiveMeta = serde_json::from_str(&meta_data).ok()?;

        if meta.version != version {
            debug!("Binary cache version mismatch in metadata");
            return None;
        }

        // Open archive + signature ONCE each with size and file-type
        // validation on the opened handles. Subsequent reads / hash /
        // signature verification all go through the FDs opened here — there
        // is no second path-based stat or open after this point, so an
        // attacker who races a swap on the cache-dir paths (symlink, FIFO,
        // device, oversized file) after these validations cannot redirect
        // what gets staged into the private dir.
        //
        // Memory pressure note: `signature::verify_from_file*` reads the
        // archive into memory in full (it is the FIPS-204 verifier's
        // contract — message must be provided as a slice). `sha256_file`
        // streams in 8 KiB chunks and is not an OOM vector. The
        // `MAX_ARCHIVE_SIZE_BYTES` cap bounds the in-memory load and the
        // staging-dir disk footprint together.
        let (mut archive_file, archive_len) = match open_regular_capped(
            &cached_archive,
            crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64,
        ) {
            Ok(pair) => pair,
            Err(e) => {
                warn!("Rejecting cached archive for {version}: {e}");
                return None;
            }
        };
        let (mut sig_file, sig_len) =
            match open_regular_capped(&cached_sig, signature::SIGNATURE_SIZE as u64) {
                Ok(pair) => pair,
                Err(e) => {
                    warn!("Rejecting cached signature for {version}: {e}");
                    return None;
                }
            };
        if sig_len != signature::SIGNATURE_SIZE as u64 {
            // open_regular_capped enforces ≤ max; we additionally require
            // EXACTLY SIGNATURE_SIZE (a shorter sig is not valid ML-DSA-65).
            warn!(
                "Cached signature for {version} has wrong size ({sig_len} bytes, \
                 expected {})",
                signature::SIGNATURE_SIZE
            );
            return None;
        }

        // Stream the validated archive + signature into the caller-private
        // directory FROM THE ALREADY-OPEN HANDLES (not from the path), so
        // the bytes the verifier reads are the exact bytes the open-handle
        // metadata checks were performed against. `take()` is belt-and-
        // braces against an attacker who extends the file after open.
        let private_archive = private_dir.join(format!("cached-{version}.archive"));
        let private_sig = private_dir.join(format!("cached-{version}.sig"));

        let cleanup = |reason: &str| {
            debug!("Cleaning staged cache copy for {version}: {reason}");
            let _ = fs::remove_file(&private_archive);
            let _ = fs::remove_file(&private_sig);
        };

        if let Err(e) = (|| -> io::Result<()> {
            let mut dest = File::create(&private_archive)?;
            io::copy(&mut (&mut archive_file).take(archive_len), &mut dest)?;
            Ok(())
        })() {
            debug!("Could not stage cached archive for {version}: {e}");
            cleanup("archive copy failed");
            return None;
        }
        if let Err(e) = (|| -> io::Result<()> {
            let mut dest = File::create(&private_sig)?;
            io::copy(&mut (&mut sig_file).take(sig_len), &mut dest)?;
            Ok(())
        })() {
            debug!("Could not stage cached signature for {version}: {e}");
            cleanup("signature copy failed");
            return None;
        }

        // Fast corruption pre-check on the PRIVATE copy (NOT the security
        // decision). A copy error or truncation surfaces here.
        let actual_hash = match sha256_file(&private_archive) {
            Ok(h) => h,
            Err(e) => {
                cleanup(&format!("sha256 read failed: {e}"));
                return None;
            }
        };
        if actual_hash != meta.archive_sha256 {
            warn!(
                "Binary cache SHA-256 mismatch for version {version} \
                 (expected {}, got {actual_hash}) — ignoring cache entry",
                meta.archive_sha256
            );
            cleanup("sha256 mismatch");
            return None;
        }

        // THE SECURITY GATE: re-verify the ML-DSA-65 signature over the
        // PRIVATE archive copy on every hit. The returned path is this same
        // private copy, so the caller extracts exactly the bytes that were
        // verified — a cache entry tampered with on disk (binary/archive
        // swap, forged metadata, or a post-verify swap attempt) cannot
        // produce a private copy whose signature verifies against the
        // pinned release key.
        if let Err(e) = self.verify_archive(&private_archive, &private_sig) {
            warn!(
                "Cached archive for version {version} FAILED ML-DSA signature \
                 re-verification ({e}); discarding cache entry (possible \
                 on-disk tampering). A fresh verified download will run."
            );
            cleanup("signature re-verification failed");
            return None;
        }

        debug!("Cached archive for version {version} passed ML-DSA re-verification");
        Some(private_archive)
    }

    /// Store a signature-verified archive in the cache.
    ///
    /// Both files are persisted (via write-to-temp-then-rename so readers
    /// never observe partial writes); the metadata file is written last so
    /// [`get_verified_archive`](Self::get_verified_archive) only succeeds
    /// once every file is complete.
    ///
    /// Defence in depth: this re-verifies the archive against its signature
    /// before caching, so a poisoned entry cannot be created through the
    /// supported path even if a caller forgot to verify first.
    ///
    /// # Errors
    ///
    /// Returns an error if the signature does not verify, the inputs cannot
    /// be read, or the cache files cannot be written.
    pub fn store_archive(
        &self,
        version: &str,
        archive_path: &Path,
        signature_path: &Path,
    ) -> Result<()> {
        // Defence in depth: refuse to persist a non-regular file, an
        // oversize archive, or a misshapen signature — mirroring the
        // `get_verified_archive` cache-hit policy. `symlink_metadata`
        // refuses to chase a symlink the caller may have planted.
        //
        // Note the intentional asymmetry with `open_regular_capped`
        // (which uses `fs::metadata` and DOES follow symlinks): on the
        // store path the source file is supplied by the caller (typically
        // a path under our control after download), so a symlink there is
        // surprising and worth rejecting. On the read path the cache dir
        // is shared and an attacker may have planted a symlink — but the
        // attacker already has write access, so chasing a symlink-to-
        // regular is no worse than them editing the regular file
        // directly, while still letting the post-open `is_file()` reject
        // symlink-to-special.
        let archive_meta = fs::symlink_metadata(archive_path)?;
        if !archive_meta.file_type().is_file() {
            return Err(Error::Upgrade(format!(
                "Refusing to cache archive for {version}: source is not a \
                 regular file (symlink/special)"
            )));
        }
        let archive_len = archive_meta.len();
        if archive_len > crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 {
            return Err(Error::Upgrade(format!(
                "Refusing to cache archive for {version}: size {archive_len} bytes \
                 exceeds MAX_ARCHIVE_SIZE_BYTES"
            )));
        }
        let sig_meta = fs::symlink_metadata(signature_path)?;
        if !sig_meta.file_type().is_file() {
            return Err(Error::Upgrade(format!(
                "Refusing to cache archive for {version}: signature is not a \
                 regular file (symlink/special)"
            )));
        }
        let sig_len = sig_meta.len();
        if sig_len != signature::SIGNATURE_SIZE as u64 {
            return Err(Error::Upgrade(format!(
                "Refusing to cache archive for {version}: signature size {sig_len} \
                 bytes, expected {}",
                signature::SIGNATURE_SIZE
            )));
        }

        self.verify_archive(archive_path, signature_path)
            .map_err(|e| {
                Error::Upgrade(format!(
                    "Refusing to cache archive for {version}: signature does not verify ({e})"
                ))
            })?;

        let archive_hash = sha256_file(archive_path)?;

        let dest_archive = self.cached_archive_path(version);
        let dest_sig = self.cached_signature_path(version);
        let meta_path = self.meta_path(version);

        Self::atomic_copy(
            archive_path,
            &dest_archive,
            &self
                .cache_dir
                .join(format!(".ant-node-{version}.archive.tmp")),
        )?;
        Self::atomic_copy(
            signature_path,
            &dest_sig,
            &self.cache_dir.join(format!(".ant-node-{version}.sig.tmp")),
        )?;

        let now = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .map_err(|e| Error::Upgrade(format!("System clock error: {e}")))?
            .as_secs();

        let meta = CachedArchiveMeta {
            version: version.to_string(),
            archive_sha256: archive_hash,
            cached_at_epoch_secs: now,
        };

        let meta_json = serde_json::to_string(&meta).map_err(|e| {
            Error::Upgrade(format!("Failed to serialize cached archive metadata: {e}"))
        })?;

        // Metadata written last so a reader never sees a complete meta file
        // pointing at an incomplete archive/signature pair.
        let tmp_meta = self.cache_dir.join(format!(".ant-node-{version}.meta.tmp"));
        let mut f = File::create(&tmp_meta)?;
        f.write_all(meta_json.as_bytes())?;
        f.sync_all()?;
        drop(f);
        let _ = fs::remove_file(&meta_path);
        fs::rename(&tmp_meta, &meta_path)?;

        debug!(
            "Cached verified archive for version {version} at {}",
            dest_archive.display()
        );
        Ok(())
    }

    /// Acquire an exclusive download lock and return the guard.
    ///
    /// This prevents multiple nodes from downloading the same archive
    /// concurrently — the first acquires the lock and downloads, the rest
    /// wait and then find the archive already cached.
    ///
    /// The lock is released when the returned guard is dropped.
    ///
    /// **Note:** `lock_exclusive()` blocks the calling thread. Callers in
    /// async contexts should wrap this call in `tokio::task::spawn_blocking`.
    ///
    /// # Errors
    ///
    /// Returns an error if the lock file cannot be created or acquired.
    pub fn acquire_download_lock(&self) -> Result<DownloadLockGuard> {
        let lock_path = self.cache_dir.join("download.lock");
        let lock = File::create(&lock_path)
            .map_err(|e| Error::Upgrade(format!("Failed to create download lock: {e}")))?;
        lock.lock_exclusive()
            .map_err(|e| Error::Upgrade(format!("Failed to acquire download lock: {e}")))?;
        Ok(DownloadLockGuard { _file: lock })
    }

    // -- private helpers -----------------------------------------------------

    /// Copy `src` to `dest` atomically via a temp file + rename.
    fn atomic_copy(src: &Path, dest: &Path, tmp: &Path) -> Result<()> {
        fs::copy(src, tmp)?;
        // Remove dest first on Windows where rename fails if it exists.
        let _ = fs::remove_file(dest);
        fs::rename(tmp, dest)?;
        Ok(())
    }

    fn meta_path(&self, version: &str) -> PathBuf {
        self.cache_dir.join(format!("ant-node-{version}.meta.json"))
    }
}

/// RAII guard that holds an exclusive download lock.
///
/// The underlying file lock is released when this guard is dropped.
pub struct DownloadLockGuard {
    _file: File,
}

/// Open `path` as a regular file with size at most `max_len`, validating
/// the metadata on the **opened handle** so a race between any prior stat
/// and the read cannot substitute a special file (FIFO/device/socket) or
/// an oversized payload. A symlink whose target is a regular file is
/// accepted (it's just an indirect path to a regular file — the attacker
/// who placed the link already needed write access to the cache dir, the
/// same access level as directly editing the regular file); a symlink
/// whose target is a special file is rejected by the `is_file()` check on
/// the opened handle.
///
/// On Unix, `open()` of a FIFO/named-pipe for reading blocks until a
/// writer connects, so a cache-dir attacker could otherwise hang the
/// upgrade indefinitely by planting a FIFO at the cache entry's path. We
/// (a) reject non-regular files via a `fs::metadata()` pre-check (follows
/// symlinks, so a symlink-to-regular is still accepted), and (b) on Unix
/// also open with `O_NONBLOCK` as a belt-and-braces defence in case the
/// pre-check races a swap. The post-open `is_file()` on the opened handle
/// remains the TOCTOU-safe gate.
///
/// Returns `(File, len)` on success; the returned `File` is positioned at
/// offset 0 and may be `io::copy`'d into a destination — callers should
/// wrap with `Read::take(max_len)` so an attacker who extends the file
/// after the metadata read cannot stream beyond the cap.
fn open_regular_capped(path: &Path, max_len: u64) -> io::Result<(File, u64)> {
    // Pre-check: refuse to even open a non-regular file. This is the
    // first line of defence against an attacker who planted a FIFO at
    // `path` — opening a FIFO for reading on Unix blocks until a writer
    // connects, hanging the upgrade indefinitely. `fs::metadata` follows
    // symlinks, so a symlink whose target is a regular file is accepted
    // here and a symlink whose target is a FIFO/device/socket is rejected.
    let pre_meta = fs::metadata(path)?;
    if !pre_meta.file_type().is_file() {
        return Err(io::Error::new(
            io::ErrorKind::InvalidInput,
            "not a regular file (FIFO/device/socket/dir)",
        ));
    }

    // Belt-and-braces against a pre-check vs open() race: on Unix also
    // open with O_NONBLOCK, so even if an attacker swaps the regular file
    // for a FIFO between the metadata read and open(), the open() returns
    // immediately instead of blocking on a writer. Reads on a regular file
    // ignore O_NONBLOCK, so this is a no-op for the happy path. The
    // post-open is_file() check below still catches the swap.
    let file = {
        let mut opts = OpenOptions::new();
        opts.read(true);
        #[cfg(unix)]
        {
            use std::os::unix::fs::OpenOptionsExt;
            // `O_NONBLOCK` is per-arch on Linux (0o4000 on x86/arm/aarch64
            // /riscv, 0o200 on mips, 0x4000 on sparc, etc.). Use `libc`
            // so we always pick the right constant for the target arch
            // instead of silently setting a different flag. Reads on a
            // regular file ignore `O_NONBLOCK` on all our supported
            // platforms, so this is a no-op for the happy path.
            opts.custom_flags(libc::O_NONBLOCK);
        }
        opts.open(path)?
    };
    let meta = file.metadata()?;
    if !meta.file_type().is_file() {
        return Err(io::Error::new(
            io::ErrorKind::InvalidInput,
            "not a regular file (FIFO/device/socket/dir)",
        ));
    }
    let len = meta.len();
    if len > max_len {
        return Err(io::Error::new(
            io::ErrorKind::InvalidInput,
            format!("file exceeds size cap ({len} > {max_len})"),
        ));
    }
    Ok((file, len))
}

/// Compute the hex-encoded SHA-256 digest of a file.
fn sha256_file(path: &Path) -> Result<String> {
    let mut file = File::open(path)?;
    let mut hasher = Sha256::new();
    let mut buf = [0u8; 8192];
    loop {
        let n = file
            .read(&mut buf)
            .map_err(|e| Error::Upgrade(format!("Failed to read file for hashing: {e}")))?;
        if n == 0 {
            break;
        }
        hasher.update(&buf[..n]);
    }
    Ok(hex::encode(hasher.finalize()))
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
    use super::*;
    use saorsa_pqc::api::sig::{ml_dsa_65, MlDsaPublicKey, MlDsaSecretKey};
    use std::sync::OnceLock;
    use tempfile::TempDir;

    /// One generated keypair for the whole test module (keygen is expensive).
    fn test_keypair() -> &'static (MlDsaPublicKey, MlDsaSecretKey) {
        static KP: OnceLock<(MlDsaPublicKey, MlDsaSecretKey)> = OnceLock::new();
        KP.get_or_init(|| ml_dsa_65().generate_keypair().unwrap())
    }

    fn cache_with_test_key(dir: &Path) -> BinaryCache {
        BinaryCache::new_with_verify_key(dir.to_path_buf(), test_keypair().0.clone())
    }

    /// A caller-private staging directory (the per-upgrade temp dir in
    /// production). Returned so it outlives the call.
    fn priv_dir() -> TempDir {
        TempDir::new().unwrap()
    }

    /// Write an archive + a valid detached signature over it.
    fn make_signed_archive(dir: &Path, contents: &[u8]) -> (PathBuf, PathBuf) {
        let archive = dir.join("src-archive");
        fs::write(&archive, contents).unwrap();
        let sig = ml_dsa_65()
            .sign_with_context(&test_keypair().1, contents, signature::SIGNING_CONTEXT)
            .unwrap();
        let sig_path = dir.join("src-archive.sig");
        fs::write(&sig_path, sig.to_bytes()).unwrap();
        (archive, sig_path)
    }

    #[test]
    fn test_miss_returns_none() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();
        assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
    }

    #[test]
    fn test_store_and_get_verified_archive() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        let (archive, sig) = make_signed_archive(tmp.path(), b"signed archive bytes");
        cache.store_archive("1.2.3", &archive, &sig).unwrap();

        let got = cache
            .get_verified_archive("1.2.3", pd.path())
            .expect("cache hit");
        assert_eq!(fs::read(&got).unwrap(), b"signed archive bytes");
        // The returned path must be the PRIVATE copy, not the shared cache
        // file (that is what closes the verify/extract TOCTOU).
        assert!(
            got.starts_with(pd.path()),
            "returned archive must be the caller-private copy, got {got:?}"
        );
        assert_ne!(got, cache.cached_archive_path("1.2.3"));
    }

    #[test]
    fn test_store_rejects_unsigned_archive() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        let archive = tmp.path().join("a");
        fs::write(&archive, b"unsigned").unwrap();
        let bad_sig = tmp.path().join("a.sig");
        fs::write(&bad_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap();

        assert!(cache.store_archive("1.0.0", &archive, &bad_sig).is_err());
        assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
    }

    /// An attacker who swaps the cached archive on disk (and even forges a
    /// matching SHA-256 in the metadata) cannot get it trusted, because
    /// the ML-DSA signature is re-verified on every hit.
    #[test]
    fn test_tampered_cached_archive_is_rejected() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        let (archive, sig) = make_signed_archive(tmp.path(), b"legit release archive");
        cache.store_archive("2.0.0", &archive, &sig).unwrap();
        assert!(cache.get_verified_archive("2.0.0", pd.path()).is_some());

        // Attacker overwrites the cached archive with a malicious payload...
        let cached_archive = cache.cached_archive_path("2.0.0");
        fs::write(&cached_archive, b"malicious payload").unwrap();

        // ...and forges the metadata SHA-256 so the corruption pre-check passes.
        let forged_hash = {
            let mut h = Sha256::new();
            h.update(b"malicious payload");
            hex::encode(h.finalize())
        };
        let meta = CachedArchiveMeta {
            version: "2.0.0".to_string(),
            archive_sha256: forged_hash,
            cached_at_epoch_secs: 0,
        };
        fs::write(
            cache.meta_path("2.0.0"),
            serde_json::to_string(&meta).unwrap(),
        )
        .unwrap();

        // The SHA-256 pre-check now passes, but ML-DSA re-verification of the
        // swapped archive against the key fails → entry rejected.
        assert!(
            cache.get_verified_archive("2.0.0", pd.path()).is_none(),
            "tampered cache entry must NOT be trusted even with a forged \
             matching SHA-256 — the signature gate runs on every hit"
        );
    }

    /// TOCTOU defence: even if an attacker swaps the *shared* cache archive
    /// for malicious bytes immediately after a hit, the previously returned
    /// path (a caller-private copy) still contains the verified bytes, so
    /// what gets extracted/executed is exactly what was signature-verified.
    #[test]
    fn test_returned_archive_is_private_copy_immune_to_post_hit_swap() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        let (archive, sig) = make_signed_archive(tmp.path(), b"the real signed release");
        cache.store_archive("3.0.0", &archive, &sig).unwrap();

        let verified = cache
            .get_verified_archive("3.0.0", pd.path())
            .expect("cache hit");

        // Attacker swaps the SHARED cache archive right after verification.
        fs::write(
            cache.cached_archive_path("3.0.0"),
            b"post-verify malicious swap",
        )
        .unwrap();

        // The path the caller will extract from is the private copy and is
        // unaffected by the shared-file swap.
        assert_eq!(
            fs::read(&verified).unwrap(),
            b"the real signed release",
            "extraction must read the verified private bytes, not the \
             attacker's post-verification swap"
        );
    }

    #[test]
    fn test_missing_signature_returns_none() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        let (archive, sig) = make_signed_archive(tmp.path(), b"data");
        cache.store_archive("1.0.0", &archive, &sig).unwrap();

        // Attacker deletes the signature to try to skip verification.
        fs::remove_file(cache.cached_signature_path("1.0.0")).unwrap();
        assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
    }

    #[test]
    fn test_missing_meta_returns_none() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();
        let (archive, sig) = make_signed_archive(tmp.path(), b"data");
        cache.store_archive("1.0.0", &archive, &sig).unwrap();
        fs::remove_file(cache.meta_path("1.0.0")).unwrap();
        assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none());
    }

    /// Size policy: an attacker with cache-dir write cannot OOM/disk-exhaust
    /// the verifier by dropping a multi-GB archive — `get_verified_archive`
    /// stat-checks the cached archive against `MAX_ARCHIVE_SIZE_BYTES` BEFORE
    /// any copy or `fs::read` reaches `signature::verify_from_file`.
    #[test]
    fn test_oversize_cached_archive_is_rejected_before_copy() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        // Plant a real signed entry so the meta/sig pass earlier checks…
        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
        cache.store_archive("3.1.0", &archive, &sig).unwrap();
        // …then truncate-grow the cached archive past the limit.
        let cached_archive = cache.cached_archive_path("3.1.0");
        let oversize = crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1;
        {
            let f = File::create(&cached_archive).unwrap();
            f.set_len(oversize).unwrap();
        }

        // The size gate rejects pre-copy → no private archive ever staged.
        assert!(cache.get_verified_archive("3.1.0", pd.path()).is_none());
        let private_archive = pd.path().join("cached-3.1.0.archive");
        assert!(
            !private_archive.exists(),
            "oversize entry must NOT be staged into private dir"
        );
    }

    #[test]
    fn test_wrong_size_signature_is_rejected_before_copy() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
        cache.store_archive("3.2.0", &archive, &sig).unwrap();
        // Replace the cached signature with the wrong size.
        fs::write(cache.cached_signature_path("3.2.0"), b"too-short").unwrap();

        assert!(cache.get_verified_archive("3.2.0", pd.path()).is_none());
    }

    /// `store_archive` itself refuses to persist an oversize archive — even
    /// from a (hypothetically) misbehaving caller that bypassed the
    /// download-time size cap.
    #[test]
    fn test_store_archive_rejects_oversize() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());

        // Make a sparse "archive" past the limit and any signature.
        let big = tmp.path().join("big.archive");
        {
            let f = File::create(&big).unwrap();
            f.set_len(crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1)
                .unwrap();
        }
        let any_sig = tmp.path().join("any.sig");
        fs::write(&any_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap();

        assert!(cache.store_archive("9.9.9", &big, &any_sig).is_err());
    }

    /// Round-3 regression: a cache-dir writer cannot bypass the size gate
    /// by planting a symlink whose `stat(2)` size is small but whose
    /// target reads indefinitely (e.g. `/dev/zero`). `symlink_metadata`
    /// + `is_file()` rejects the entry before any `fs::copy` reads it.
    #[cfg(unix)]
    #[test]
    fn test_symlink_cached_archive_is_rejected_before_copy() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        // Plant a legit signed entry so meta/version/sig-size are good…
        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
        cache.store_archive("4.0.0", &archive, &sig).unwrap();
        // …then replace the cached archive with a symlink to /dev/zero.
        let cached_archive = cache.cached_archive_path("4.0.0");
        fs::remove_file(&cached_archive).unwrap();
        std::os::unix::fs::symlink("/dev/zero", &cached_archive).unwrap();

        assert!(
            cache.get_verified_archive("4.0.0", pd.path()).is_none(),
            "a symlinked cached archive must be rejected pre-copy, \
             not chased into /dev/zero"
        );
        // Nothing should have been staged.
        assert!(!pd.path().join("cached-4.0.0.archive").exists());
    }

    /// `.meta.json` is read through the same size/file-type gate as the
    /// archive and signature: planting a multi-MB metadata file (or a
    /// metadata symlink to a special file) is rejected pre-parse without
    /// risking a hang or large allocation.
    #[test]
    fn test_oversized_meta_is_rejected() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        // Establish a valid entry so archive/sig are well-formed.
        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
        cache.store_archive("5.0.0", &archive, &sig).unwrap();

        // Overwrite meta with a file well above MAX_META_BYTES of garbage.
        let meta_path = cache.meta_path("5.0.0");
        let huge = vec![b'a'; usize::try_from(MAX_META_BYTES).unwrap_or(usize::MAX) + 1024];
        fs::write(&meta_path, &huge).unwrap();

        assert!(
            cache.get_verified_archive("5.0.0", pd.path()).is_none(),
            "oversized metadata file must be rejected before parsing"
        );
    }

    /// A cache-dir attacker who replaces the cached archive with a FIFO
    /// must not be able to hang `get_verified_archive` waiting for a
    /// writer to connect. The pre-check + `O_NONBLOCK` belt-and-braces
    /// returns immediately with an error, the cache hit is abandoned, and
    /// the caller falls back to a fresh verified download.
    #[cfg(unix)]
    #[test]
    fn test_fifo_cached_archive_does_not_hang() {
        use std::time::{Duration, Instant};
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        // Plant a legit signed entry so meta/version/sig-size are good,
        // then replace the cached archive with a FIFO. Without the
        // pre-check + O_NONBLOCK, opening the FIFO for reading would
        // block until a writer connected.
        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
        cache.store_archive("6.0.0", &archive, &sig).unwrap();
        let cached_archive = cache.cached_archive_path("6.0.0");
        fs::remove_file(&cached_archive).unwrap();

        // Use libc::mkfifo directly so a CI image that drops coreutils
        // can't silently skip this test (an earlier shell-out version
        // would hide a packaging regression). The unsafe block is scoped
        // to the single FFI call — `mkfifo(2)` takes a NUL-terminated
        // path, returns 0 on success and -1 on error with errno set.
        let cstr = std::ffi::CString::new(cached_archive.as_os_str().as_encoded_bytes()).unwrap();
        #[allow(unsafe_code)]
        let rc = unsafe { libc::mkfifo(cstr.as_ptr(), 0o600) };
        assert_eq!(rc, 0, "mkfifo failed: {}", std::io::Error::last_os_error());

        // Measure only the cache-hit path so cold-process startup or
        // unrelated test parallelism don't blow the budget.
        let start = Instant::now();
        let got = cache.get_verified_archive("6.0.0", pd.path());
        let elapsed = start.elapsed();

        assert!(
            got.is_none(),
            "a FIFO planted at the cached archive path must be rejected"
        );
        // 5s gives generous headroom on a contended CI macOS runner
        // while still catching a real "open is blocking on the FIFO".
        assert!(
            elapsed < Duration::from_secs(5),
            "open of FIFO returned in {elapsed:?}, expected ≪ 5s — \
             pre-check or O_NONBLOCK is not catching this"
        );
        // Nothing should have been staged.
        assert!(!pd.path().join("cached-6.0.0.archive").exists());
    }

    /// `.meta.json` planted as a symlink to a special file (e.g.
    /// `/dev/zero`) is rejected by the open-handle file-type check,
    /// without hanging or OOM'ing on the read.
    #[cfg(unix)]
    #[test]
    fn test_meta_symlink_to_special_file_is_rejected() {
        let tmp = TempDir::new().unwrap();
        let cache = cache_with_test_key(tmp.path());
        let pd = priv_dir();

        let (archive, sig) = make_signed_archive(tmp.path(), b"legit");
        cache.store_archive("5.1.0", &archive, &sig).unwrap();

        let meta_path = cache.meta_path("5.1.0");
        fs::remove_file(&meta_path).unwrap();
        std::os::unix::fs::symlink("/dev/zero", &meta_path).unwrap();

        assert!(
            cache.get_verified_archive("5.1.0", pd.path()).is_none(),
            "metadata symlink to a special file must be rejected"
        );
    }
}