Skip to main content

treeship_core/keys/
mod.rs

1use std::{
2    collections::HashMap,
3    fs,
4    io::{self, Read, Write},
5    path::{Path, PathBuf},
6    sync::{Arc, RwLock},
7};
8
9use aes_gcm::{
10    aead::{Aead, KeyInit, OsRng as AeadOsRng, Payload},
11    AeadCore, Aes256Gcm, Key as AesKey, Nonce,
12};
13use rand::{rngs::OsRng, RngCore};
14use serde::{Deserialize, Serialize};
15use sha2::{Digest as Sha2Digest, Sha256};
16use zeroize::Zeroizing;
17
18use crate::attestation::{Ed25519Signer, Signer};
19
20// --- Public types ---
21
22pub type KeyId = String;
23
24/// Public information about a stored key. Never contains private material.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct KeyInfo {
27    pub id:          KeyId,
28    pub algorithm:   String,   // "ed25519"
29    pub is_default:  bool,
30    pub created_at:  String,   // RFC 3339
31    /// First 8 bytes of sha256(public_key), hex-encoded.
32    pub fingerprint: String,
33    pub public_key:  Vec<u8>,  // raw 32-byte Ed25519 public key
34    /// RFC 3339 timestamp after which signatures by this key should be
35    /// considered stale. `None` means the key has not been rotated and is
36    /// indefinitely valid. Set automatically by `Store::rotate` to
37    /// `now + grace_period` on the predecessor key.
38    #[serde(default, skip_serializing_if = "Option::is_none")]
39    pub valid_until: Option<String>,
40    /// If this key was rotated to a successor, the successor's key id.
41    /// Lets verifiers walk a rotation chain forward when validating an old
42    /// receipt against the current keystore. `None` means this is the head
43    /// of its chain.
44    #[serde(default, skip_serializing_if = "Option::is_none")]
45    pub successor_key_id: Option<KeyId>,
46}
47
48/// Outcome of a `Store::rotate` call.
49#[derive(Debug, Clone)]
50pub struct RotationResult {
51    /// The key that was rotated. Its `valid_until` is now set.
52    pub predecessor: KeyInfo,
53    /// The freshly minted successor key.
54    pub successor: KeyInfo,
55    /// RFC 3339 timestamp until which the predecessor remains valid for
56    /// signature verification under the grace period. Equal to
57    /// `predecessor.valid_until.unwrap()`.
58    pub grace_period_until: String,
59}
60
61/// Errors from keystore operations.
62#[derive(Debug)]
63pub enum KeyError {
64    Io(io::Error),
65    Json(serde_json::Error),
66    Crypto(String),
67    NotFound(KeyId),
68    EmptyKeyId,
69    NoDefaultKey,
70    /// Private key file has insecure permissions (group- or world-readable).
71    /// Carries the path and the observed octal mode so the caller can show
72    /// an actionable error. Set `TREESHIP_ALLOW_INSECURE_KEY_PERMS=1` to
73    /// bypass during testing or controlled environments.
74    InsecureKeyPerms { path: PathBuf, mode: u32 },
75}
76
77impl std::fmt::Display for KeyError {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        match self {
80            Self::Io(e)       => write!(f, "keys io: {}", e),
81            Self::Json(e)     => write!(f, "keys json: {}", e),
82            Self::Crypto(e)   => write!(f, "keys crypto: {}", e),
83            Self::NotFound(k) => write!(f, "key not found: {}", k),
84            Self::EmptyKeyId  => write!(f, "key id must not be empty"),
85            Self::NoDefaultKey => write!(f, "no default key — run treeship init"),
86            Self::InsecureKeyPerms { path, mode } => write!(
87                f,
88                "private key {} has insecure permissions (mode {:o}); \
89                 run `treeship doctor --fix` or chmod 600 the file. \
90                 Set TREESHIP_ALLOW_INSECURE_KEY_PERMS=1 to bypass.",
91                path.display(),
92                mode & 0o777,
93            ),
94        }
95    }
96}
97
98impl std::error::Error for KeyError {}
99impl From<io::Error>          for KeyError { fn from(e: io::Error)          -> Self { Self::Io(e) } }
100impl From<serde_json::Error>  for KeyError { fn from(e: serde_json::Error)  -> Self { Self::Json(e) } }
101
102// --- On-disk formats ---
103
104/// The encrypted representation of one keypair on disk.
105#[derive(Serialize, Deserialize, Clone)]
106struct EncryptedEntry {
107    id:           KeyId,
108    algorithm:    String,
109    created_at:   String,
110    public_key:   Vec<u8>,
111    /// AES-256-GCM ciphertext of the 32-byte Ed25519 secret scalar.
112    enc_priv_key: Vec<u8>,
113    /// 12-byte GCM nonce used when encrypting.
114    nonce:        Vec<u8>,
115    /// RFC 3339 timestamp after which signatures by this key should be
116    /// considered stale. `None` means the key is indefinitely valid.
117    /// Defaulted on deserialization so pre-0.9.5 entry files still load.
118    #[serde(default, skip_serializing_if = "Option::is_none")]
119    valid_until: Option<String>,
120    /// Successor key id if this key was rotated. Defaulted on
121    /// deserialization for pre-0.9.5 entry files.
122    #[serde(default, skip_serializing_if = "Option::is_none")]
123    successor_key_id: Option<KeyId>,
124}
125
126/// The manifest file: which keys exist and which is the default.
127#[derive(Serialize, Deserialize, Default)]
128struct Manifest {
129    default_key_id: Option<KeyId>,
130    key_ids:        Vec<KeyId>,
131}
132
133// --- Store ---
134
135/// Local encrypted keystore.
136///
137/// Private keys are encrypted with AES-256-GCM (RustCrypto `aes-gcm`
138/// 0.10) before writing to disk. The encryption key is derived from a
139/// machine-specific secret so key files are useless if copied to
140/// another machine.
141///
142/// Pre-v0.10.3 keystores used a homemade SHA-256-CTR + HMAC-SHA-256
143/// construction (TS-2026-001) and are transparently migrated to the
144/// new AEAD format on first decrypt; see `encrypt_for_disk_v2` /
145/// `decrypt_from_disk` for the format dispatcher.
146///
147/// A future version will delegate to OS credential stores (Secure
148/// Enclave / TPM 2.0).
149pub struct Store {
150    dir:         PathBuf,
151    machine_key: [u8; 32],
152    /// In-memory cache — avoids disk reads on hot paths.
153    cache:       Arc<RwLock<HashMap<KeyId, EncryptedEntry>>>,
154}
155
156impl Store {
157    /// Opens or creates a keystore at `dir`.
158    pub fn open(dir: impl AsRef<Path>) -> Result<Self, KeyError> {
159        let dir = dir.as_ref().to_path_buf();
160        fs::create_dir_all(&dir)?;
161
162        let machine_key = derive_machine_key(&dir)?;
163
164        Ok(Self {
165            dir,
166            machine_key,
167            cache: Arc::new(RwLock::new(HashMap::new())),
168        })
169    }
170
171    /// Generates a new Ed25519 keypair, encrypts and stores it.
172    /// If `set_default` is true (or there is no current default), makes
173    /// this key the default signing key.
174    pub fn generate(&self, set_default: bool) -> Result<KeyInfo, KeyError> {
175        let key_id = new_key_id();
176
177        let signer = Ed25519Signer::generate(&key_id)
178            .map_err(|e| KeyError::Crypto(e.to_string()))?;
179
180        // `secret` is a Zeroizing<[u8; 32]> -- the caller-side copy of the
181        // signer's secret scalar is wiped on scope exit. `signer` is dropped
182        // at end of fn, which wipes its own copy via the Drop impl in
183        // attestation::signer.
184        let secret  = signer.secret_bytes();
185        let pub_key = signer.public_key_bytes();
186
187        let enc = encrypt_for_disk_v2(&self.machine_key, key_id.as_str(), &pub_key, secret.as_slice())
188            .map_err(KeyError::Crypto)?;
189
190        let entry = EncryptedEntry {
191            id:               key_id.clone(),
192            algorithm:        "ed25519".into(),
193            created_at:       crate::statements::unix_to_rfc3339(unix_now()),
194            public_key:       pub_key.clone(),
195            enc_priv_key:     enc,
196            // v2 ciphertexts carry their nonce inline (bytes [2..14]).
197            // The separate `nonce` field is retained for v1 legacy
198            // compatibility; for fresh v2 entries we serialize an empty
199            // vec so the JSON stays well-formed.
200            nonce:            Vec::new(),
201            valid_until:      None,
202            successor_key_id: None,
203        };
204
205        self.write_entry(&entry)?;
206
207        // Update manifest.
208        let mut manifest = self.read_manifest()?;
209        manifest.key_ids.push(key_id.clone());
210        if set_default || manifest.default_key_id.is_none() {
211            manifest.default_key_id = Some(key_id.clone());
212        }
213        self.write_manifest(&manifest)?;
214
215        // Populate cache.
216        self.cache.write().unwrap().insert(key_id.clone(), entry);
217
218        Ok(KeyInfo {
219            id:               key_id.clone(),
220            algorithm:        "ed25519".into(),
221            is_default:       manifest.default_key_id.as_deref() == Some(key_id.as_str()),
222            created_at:       crate::statements::unix_to_rfc3339(unix_now()),
223            fingerprint:      fingerprint(&pub_key),
224            public_key:       pub_key,
225            valid_until:      None,
226            successor_key_id: None,
227        })
228    }
229
230    /// Rotate the current default key (or a specific key) to a freshly
231    /// generated successor.
232    ///
233    /// Mints a new Ed25519 keypair, links the predecessor to it via
234    /// `successor_key_id`, and stamps the predecessor with a `valid_until`
235    /// of `now + grace_period`. The grace window lets verifiers continue to
236    /// accept signatures from the predecessor while clients catch up to
237    /// the new public key.
238    ///
239    /// If `set_default` is true (the typical case -- you rotate because you
240    /// want to start signing with the new key immediately), the successor
241    /// becomes the default. Pass `false` to stage a rotation for review
242    /// without flipping the active signer.
243    ///
244    /// `predecessor_id` may be `None` to rotate the current default. Pass
245    /// an explicit id to rotate a non-default key (e.g. a per-environment
246    /// secondary).
247    ///
248    /// Note on threat model: this is a graceful rotation primitive, not a
249    /// revocation primitive. If the predecessor key is suspected compromised
250    /// the grace_period should be `Duration::ZERO` (or use a future
251    /// `revoke()` call once that lands) so the predecessor's `valid_until`
252    /// is in the past and any verifier honoring the metadata refuses
253    /// further signatures from it.
254    pub fn rotate(
255        &self,
256        predecessor_id: Option<&str>,
257        grace_period: std::time::Duration,
258        set_default: bool,
259    ) -> Result<RotationResult, KeyError> {
260        // Resolve predecessor: explicit id, else the current default.
261        let pred_id = match predecessor_id {
262            Some(id) => id.to_string(),
263            None => self.default_key_id()?,
264        };
265
266        // Refuse to rotate a key that has already been rotated -- the
267        // chain head is the only valid rotation source. This makes the
268        // operation idempotent in the face of accidental re-runs.
269        let pred_entry_existing = self.load_entry(&pred_id)?;
270        if let Some(existing) = &pred_entry_existing.successor_key_id {
271            return Err(KeyError::Crypto(format!(
272                "key {pred_id} has already been rotated to {existing}; \
273                 rotate the chain head instead"
274            )));
275        }
276
277        // Mint the successor. We deliberately do NOT call `self.generate()`
278        // because that path also updates the manifest's default. We need a
279        // single transactional update that sets both predecessor metadata
280        // AND (optionally) the new default in one manifest write.
281        let succ_id = new_key_id();
282        let signer = Ed25519Signer::generate(&succ_id)
283            .map_err(|e| KeyError::Crypto(e.to_string()))?;
284        // `succ_secret` is a Zeroizing<[u8; 32]>; the caller-side copy is
285        // wiped on scope exit, and `signer` is dropped at end of fn (which
286        // wipes its own copy via the attestation::signer Drop impl).
287        let succ_secret  = signer.secret_bytes();
288        let succ_pub_key = signer.public_key_bytes();
289        let succ_enc =
290            encrypt_for_disk_v2(&self.machine_key, succ_id.as_str(), &succ_pub_key, succ_secret.as_slice())
291                .map_err(KeyError::Crypto)?;
292
293        let succ_created = crate::statements::unix_to_rfc3339(unix_now());
294        let succ_entry = EncryptedEntry {
295            id:               succ_id.clone(),
296            algorithm:        "ed25519".into(),
297            created_at:       succ_created.clone(),
298            public_key:       succ_pub_key.clone(),
299            enc_priv_key:     succ_enc,
300            // v2 ciphertexts carry their nonce inline; the legacy
301            // `nonce` field is left empty for fresh writes.
302            nonce:            Vec::new(),
303            valid_until:      None,
304            successor_key_id: None,
305        };
306
307        // Stamp the predecessor with the grace deadline and link forward.
308        let valid_until = crate::statements::unix_to_rfc3339(
309            unix_now() + grace_period.as_secs(),
310        );
311        let mut pred_entry = pred_entry_existing;
312        pred_entry.valid_until      = Some(valid_until.clone());
313        pred_entry.successor_key_id = Some(succ_id.clone());
314
315        // Write order matters for partial-failure recovery. Persist the
316        // successor entry FIRST, then stamp the predecessor pointing at
317        // it. If we wrote the predecessor first and then the successor
318        // write failed, the predecessor's successor_key_id would dangle
319        // at a key that doesn't exist on disk -- and the
320        // already-been-rotated guard would refuse to retry. With this
321        // order:
322        //   - successor write fails: nothing observable changed; retry clean.
323        //   - predecessor write fails: orphan successor key file on disk
324        //     (not yet referenced by manifest or by any other key); retry
325        //     generates a new successor and the orphan is harmless.
326        //   - manifest write fails: predecessor + successor both on disk,
327        //     manifest stale; retry's already-rotated guard catches the
328        //     half-finished state and surfaces a clear error.
329        self.write_entry(&succ_entry)?;
330        self.write_entry(&pred_entry)?;
331
332        // Refresh the cache to mirror the on-disk state we just wrote --
333        // BEFORE the manifest update. If the manifest write fails, the
334        // cache must still match disk so a same-process retry sees the
335        // half-rotated state and the already-rotated guard fires
336        // correctly. Doing this AFTER write_manifest would leave a
337        // window where disk reflects the rotation but the in-memory
338        // cache still serves the unstamped predecessor, and a retry
339        // from the same Store instance would generate a duplicate
340        // successor -- defeating the whole point of the guard.
341        {
342            let mut cache = self.cache.write().unwrap();
343            cache.insert(pred_entry.id.clone(), pred_entry.clone());
344            cache.insert(succ_id.clone(),       succ_entry.clone());
345        }
346
347        // Update the manifest: register the new key, optionally promote it.
348        let mut manifest = self.read_manifest()?;
349        manifest.key_ids.push(succ_id.clone());
350        if set_default {
351            manifest.default_key_id = Some(succ_id.clone());
352        }
353        self.write_manifest(&manifest)?;
354
355        let default_id = manifest.default_key_id.clone();
356        let predecessor = KeyInfo {
357            id:               pred_entry.id.clone(),
358            algorithm:        pred_entry.algorithm.clone(),
359            is_default:       default_id.as_deref() == Some(pred_entry.id.as_str()),
360            created_at:       pred_entry.created_at.clone(),
361            fingerprint:      fingerprint(&pred_entry.public_key),
362            public_key:       pred_entry.public_key.clone(),
363            valid_until:      pred_entry.valid_until.clone(),
364            successor_key_id: pred_entry.successor_key_id.clone(),
365        };
366        let successor = KeyInfo {
367            id:               succ_id.clone(),
368            algorithm:        "ed25519".into(),
369            is_default:       default_id.as_deref() == Some(succ_id.as_str()),
370            created_at:       succ_created,
371            fingerprint:      fingerprint(&succ_pub_key),
372            public_key:       succ_pub_key,
373            valid_until:      None,
374            successor_key_id: None,
375        };
376
377        Ok(RotationResult {
378            predecessor,
379            successor,
380            grace_period_until: valid_until,
381        })
382    }
383
384    /// Walk the rotation chain forward from `id`, returning the ordered
385    /// list of key ids: `[id, successor_of_id, ...]`. The first element is
386    /// always `id` itself. Stops at a key with no `successor_key_id`.
387    pub fn successor_chain(&self, id: &str) -> Result<Vec<KeyId>, KeyError> {
388        let mut chain = Vec::new();
389        let mut cursor = id.to_string();
390        // Cap iterations at the manifest size to defend against a corrupt
391        // chain that loops back on itself. A well-formed chain is bounded
392        // by the number of keys in the keystore.
393        let max_steps = self.read_manifest()?.key_ids.len() + 1;
394        for _ in 0..max_steps {
395            chain.push(cursor.clone());
396            let entry = self.load_entry(&cursor)?;
397            match entry.successor_key_id {
398                Some(next) => cursor = next,
399                None => return Ok(chain),
400            }
401        }
402        Err(KeyError::Crypto(format!(
403            "rotation chain starting at {id} exceeds keystore size; suspected loop"
404        )))
405    }
406
407    /// Returns the `KeyInfo` for every key whose `valid_until` is either
408    /// unset or strictly after `at_unix_secs`. The result includes both
409    /// rotated-but-still-in-grace predecessors and never-rotated keys.
410    /// Useful for building a verifier's accept-set as of a given time.
411    pub fn valid_keys_at(&self, at_unix_secs: u64) -> Result<Vec<KeyInfo>, KeyError> {
412        let cutoff_rfc = crate::statements::unix_to_rfc3339(at_unix_secs);
413        Ok(self.list()?
414            .into_iter()
415            .filter(|k| match &k.valid_until {
416                None => true,
417                Some(until) => until.as_str() > cutoff_rfc.as_str(),
418            })
419            .collect())
420    }
421
422    /// Returns a boxed `Signer` for the current default key.
423    pub fn default_signer(&self) -> Result<Box<dyn Signer>, KeyError> {
424        let manifest = self.read_manifest()?;
425        let id = manifest.default_key_id.ok_or(KeyError::NoDefaultKey)?;
426        self.signer(&id)
427    }
428
429    /// Returns a boxed `Signer` for a specific key ID.
430    ///
431    /// Refuses to load if the on-disk key file has insecure permissions
432    /// (any group or world bits). This is the choke point for *all*
433    /// signing — public-key reads and successor lookups go through
434    /// `read_entry` / `public_key` and are not affected.
435    ///
436    /// Bypass with `TREESHIP_ALLOW_INSECURE_KEY_PERMS=1` for controlled
437    /// environments (CI sandboxes, recovery flows). The bypass should
438    /// not be set in normal operation.
439    ///
440    /// TOCTOU note: the perm-check and the ciphertext read run against
441    /// the SAME file descriptor (open once, fstat, then read from that
442    /// fd). The previous shape — `check_key_file_perms(path)` followed
443    /// by `load_entry(id)` (which called `fs::read(path)`) — opened the
444    /// file twice. An attacker with write access to `~/.treeship/keys/`
445    /// could swap the file between the two opens: first present an
446    /// owner-only file to pass the perm gate, then replace it with a
447    /// different (loose-perm) file containing an attacker-controlled
448    /// scalar before the second `open`. The single-fd shape closes that
449    /// window because the inode is pinned by the open file descriptor;
450    /// path-level swaps after the open don't affect what we read. This
451    /// matches the pattern in `session/event_log.rs::open_lock_file`.
452    pub fn signer(&self, id: &str) -> Result<Box<dyn Signer>, KeyError> {
453        let entry = self.read_entry_with_perm_check(id)?;
454
455        // Dispatcher: v2 ciphertexts start with magic 0x54, version 0x02
456        // and use real AES-256-GCM. Older entries fall through to the
457        // legacy SHA-256-CTR+HMAC path (`decrypt_legacy_v1`) and are
458        // transparently re-encrypted in the new format below.
459        let was_legacy = is_legacy_v1(&entry.enc_priv_key);
460        let secret = decrypt_from_disk(
461            &self.machine_key,
462            &entry.id,
463            &entry.public_key,
464            &entry.enc_priv_key,
465            &entry.nonce,
466        )
467            .map_err(|e| self.enrich_crypto_error(e))?;
468
469        // L3: wrap the on-stack copy of the decrypted secret in a
470        // `Zeroizing` so the byte buffer is wiped on drop. `secret`
471        // itself is already a `Zeroizing<Vec<u8>>` returned by
472        // `decrypt_from_disk`, but `try_into::<[u8; 32]>` produces an
473        // independent stack-allocated array that the Vec's Drop will
474        // not cover. Without this wrapper, returning from `signer()`
475        // would leave the secret scalar in stale stack memory until
476        // a future stack frame happens to overwrite it.
477        let secret_arr: Zeroizing<[u8; 32]> = Zeroizing::new(
478            secret.as_slice().try_into()
479                .map_err(|_| KeyError::Crypto("decrypted key is wrong length".into()))?
480        );
481
482        // Transparent migration: if this entry was still in the legacy
483        // v1 format (the broken SHA-256-CTR construction from
484        // TS-2026-001), re-encrypt it with v2 AES-256-GCM and rewrite
485        // the file. We do this best-effort -- a migration failure here
486        // must NOT block signing for the current call, since the
487        // in-memory secret is already valid. The next decrypt on a
488        // fresh process will retry.
489        if was_legacy {
490            if let Err(e) = self.migrate_entry_to_v2(&entry, &secret_arr) {
491                // Surface the failure as a tracing-style stderr note
492                // rather than an error -- the user's signing flow is
493                // unaffected, and we'd rather them know about it than
494                // wedge the call.
495                eprintln!(
496                    "treeship: keystore entry {} could not be migrated \
497                     from legacy v1 format to v2 ({}); will retry next \
498                     load",
499                    entry.id, e
500                );
501            }
502        }
503
504        let signer = Ed25519Signer::from_bytes(&entry.id, &secret_arr)
505            .map_err(|e| KeyError::Crypto(e.to_string()))?;
506
507        Ok(Box::new(signer))
508    }
509
510    /// Re-encrypt a legacy v1 entry with the new v2 AEAD and persist
511    /// it. Updates the in-memory cache so subsequent loads in the same
512    /// process see the migrated entry. Idempotent; safe to invoke
513    /// concurrently because the migration is serialized by a per-entry
514    /// advisory lock on `<entry>.migrate.lock` (TS-2026-001 H3).
515    ///
516    /// We lock a *sentinel* file rather than the entry file itself,
517    /// because the entry file is renamed-into-place during the atomic
518    /// write inside `write_entry`. Holding a flock on the entry's inode
519    /// while a sibling process renames a new inode into its path is
520    /// nonsensical (the lock would survive on the now-orphaned inode);
521    /// the sentinel sidecar has a stable identity for the whole
522    /// migration window.
523    ///
524    /// Same blocking-flock pattern as `packages/core/src/session/event_log.rs`
525    /// (Lane F): exclusive lock, then a same-thread re-read to settle
526    /// "did a peer already migrate while I was waiting?" cleanly.
527    fn migrate_entry_to_v2(
528        &self,
529        old_entry: &EncryptedEntry,
530        secret: &[u8; 32],
531    ) -> Result<(), KeyError> {
532        let entry_path = self.entry_path(&old_entry.id);
533        let lock_path = entry_path.with_extension("migrate.lock");
534
535        // Open (or create) the sentinel lock file with restrictive perms
536        // and take an exclusive flock. We intentionally use the blocking
537        // `lock_exclusive` -- not `try_lock_exclusive` -- because the
538        // migration window is short (a single AEAD encrypt + atomic
539        // rename) and the worst case under contention is one writer
540        // serialized behind another. Pulling the
541        // try-with-bounded-retry pattern in here would buy us nothing:
542        // the second writer's re-read after the lock releases would
543        // observe the now-v2 entry and short-circuit.
544        let lock_file = open_migration_lock_file(&lock_path)
545            .map_err(KeyError::Io)?;
546
547        #[cfg(not(target_family = "wasm"))]
548        {
549            use fs2::FileExt;
550            lock_file.lock_exclusive().map_err(KeyError::Io)?;
551        }
552
553        // Under the lock: did a peer already complete the migration
554        // while we were waiting? If so, our work is done -- we must
555        // NOT rewrite, because we'd overwrite a peer's freshly-rotated
556        // v2 ciphertext with our own (semantically equivalent, but
557        // unnecessary I/O and an unnecessary cache update).
558        if let Ok(current) = self.read_entry(&old_entry.id) {
559            if !is_legacy_v1(&current.enc_priv_key) {
560                // Peer already migrated. Refresh the cache so subsequent
561                // loads in this process see the v2 entry rather than
562                // the stale legacy copy our caller passed in.
563                if let Ok(mut cache) = self.cache.write() {
564                    cache.insert(current.id.clone(), current);
565                }
566                // Lock drops at function exit; sentinel file remains on
567                // disk as a harmless inode (no migration data, idempotent
568                // for future invocations).
569                return Ok(());
570            }
571        }
572
573        let new_ciphertext = encrypt_for_disk_v2(
574            &self.machine_key,
575            &old_entry.id,
576            &old_entry.public_key,
577            secret,
578        )
579        .map_err(KeyError::Crypto)?;
580
581        let migrated = EncryptedEntry {
582            id:               old_entry.id.clone(),
583            algorithm:        old_entry.algorithm.clone(),
584            created_at:       old_entry.created_at.clone(),
585            public_key:       old_entry.public_key.clone(),
586            enc_priv_key:     new_ciphertext,
587            // v2 carries the nonce inline; clear the legacy field.
588            nonce:            Vec::new(),
589            valid_until:      old_entry.valid_until.clone(),
590            successor_key_id: old_entry.successor_key_id.clone(),
591        };
592
593        self.write_entry(&migrated)?;
594        if let Ok(mut cache) = self.cache.write() {
595            cache.insert(migrated.id.clone(), migrated);
596        }
597
598        // Best-effort cleanup of the sentinel lock file. We hold the
599        // lock until function exit (drop), so by the time we reach
600        // here it is safe to unlink the inode -- future migrations
601        // for this entry will succeed via the early-return path
602        // because the entry is now v2. Leaving the sentinel behind is
603        // also harmless; on Unix removing a flocked file is allowed
604        // and the lock is released on fd drop regardless.
605        let _ = std::fs::remove_file(&lock_path);
606
607        // Keep the lock_file binding alive to function exit so the
608        // flock is held across write_entry + remove_file. Explicit
609        // drop makes the intent obvious to readers.
610        drop(lock_file);
611        Ok(())
612    }
613
614    /// Wrap a bare crypto error (typically "MAC verification failed ..." from
615    /// the AES-GCM decrypt path) with a diagnostic and an actionable recovery
616    /// path.
617    ///
618    /// The common failure mode in the wild is a pre-0.9.x keystore whose
619    /// machine-key derivation was seed-file-based. Later versions derive
620    /// the machine key from hostname+username (macOS) or /etc/machine-id
621    /// (Linux), so old ciphertexts can't be MAC-verified with the new key.
622    /// Detecting that case is best-effort: the presence of a legacy seed
623    /// file (`.machineseed` or `machine_seed` inside the keys dir) is a
624    /// strong hint. If we see one, call it out explicitly.
625    fn enrich_crypto_error(&self, raw: String) -> KeyError {
626        // Only enrich on MAC failures -- other errors (I/O, wrong length) are
627        // surfaced as-is because their remediation differs.
628        if !raw.contains("MAC verification failed") {
629            return KeyError::Crypto(raw);
630        }
631
632        let legacy_seed_dot = self.dir.join(".machineseed");
633        let legacy_seed     = self.dir.join("machine_seed");
634        let has_legacy_seed = legacy_seed_dot.exists() || legacy_seed.exists();
635
636        let diagnosis = if has_legacy_seed {
637            "your keystore was created by an older Treeship version whose \
638             machine-key derivation has since changed. The ciphertext is \
639             intact but cannot be decrypted under the current derivation."
640        } else {
641            "the keystore cannot be decrypted. Usual causes: the key file \
642             was copied from a different machine, the hostname or username \
643             changed, or the file was corrupted."
644        };
645
646        // Resolve the user's ~/.treeship path for the recovery command, so
647        // we give a copy-pasteable command rather than a generic instruction.
648        let ts_dir = std::env::var("HOME")
649            .map(|h| format!("{h}/.treeship"))
650            .unwrap_or_else(|_| "~/.treeship".into());
651
652        // The outer KeyError::Crypto Display impl already prepends
653        // "keys crypto: "; don't double it. Start with the raw MAC error
654        // so the user still sees the underlying cryptographic reason,
655        // then follow with the human-readable diagnosis and recovery.
656        let msg = format!(
657            "{raw}\n\n  \
658             Diagnosis: {diagnosis}\n\n  \
659             Recovery (nondestructive -- the old keystore is moved aside, \
660             not deleted; any sealed .treeship packages you produced remain \
661             verifiable since their receipts embed the old public key):\n\n    \
662             mv {ts_dir} {ts_dir}.bak.$(date +%s)\n    \
663             treeship init\n"
664        );
665
666        KeyError::Crypto(msg)
667    }
668
669    /// Returns the default key ID.
670    pub fn default_key_id(&self) -> Result<KeyId, KeyError> {
671        self.read_manifest()?
672            .default_key_id
673            .ok_or(KeyError::NoDefaultKey)
674    }
675
676    /// Lists all keys.
677    pub fn list(&self) -> Result<Vec<KeyInfo>, KeyError> {
678        let manifest = self.read_manifest()?;
679        let default  = manifest.default_key_id.as_deref().unwrap_or("");
680
681        manifest.key_ids.iter().map(|id| {
682            let entry = self.load_entry(id)?;
683            Ok(KeyInfo {
684                id:               entry.id.clone(),
685                algorithm:        entry.algorithm.clone(),
686                is_default:       entry.id == default,
687                created_at:       entry.created_at.clone(),
688                fingerprint:      fingerprint(&entry.public_key),
689                public_key:       entry.public_key.clone(),
690                valid_until:      entry.valid_until.clone(),
691                successor_key_id: entry.successor_key_id.clone(),
692            })
693        }).collect()
694    }
695
696    /// Sets the default signing key.
697    pub fn set_default(&self, id: &str) -> Result<(), KeyError> {
698        // Verify the key exists before updating the manifest.
699        self.load_entry(id)?;
700        let mut manifest = self.read_manifest()?;
701        manifest.default_key_id = Some(id.to_string());
702        self.write_manifest(&manifest)
703    }
704
705    /// Returns the public key bytes for a key ID.
706    pub fn public_key(&self, id: &str) -> Result<Vec<u8>, KeyError> {
707        Ok(self.load_entry(id)?.public_key)
708    }
709
710    // --- private ---
711
712    fn load_entry(&self, id: &str) -> Result<EncryptedEntry, KeyError> {
713        // Check cache first.
714        if let Ok(cache) = self.cache.read() {
715            if let Some(entry) = cache.get(id) {
716                return Ok(entry.clone());
717            }
718        }
719        self.read_entry(id)
720    }
721
722    fn entry_path(&self, id: &str) -> PathBuf {
723        self.dir.join(format!("{}.json", id))
724    }
725
726    fn write_entry(&self, entry: &EncryptedEntry) -> Result<(), KeyError> {
727        let path = self.entry_path(&entry.id);
728        let json = serde_json::to_vec_pretty(entry)?;
729        write_file_600(&path, &json)?;
730        Ok(())
731    }
732
733    fn read_entry(&self, id: &str) -> Result<EncryptedEntry, KeyError> {
734        let path = self.entry_path(id);
735        if !path.exists() {
736            return Err(KeyError::NotFound(id.to_string()));
737        }
738        let bytes = fs::read(&path)?;
739        let entry: EncryptedEntry = serde_json::from_slice(&bytes)?;
740        Ok(entry)
741    }
742
743    /// Single-open, race-free counterpart to `read_entry` for the
744    /// signing path. Opens the key file ONCE, fstat's the file
745    /// descriptor to check perms, then reads the JSON from the SAME
746    /// descriptor. The path is never re-resolved after the open, so an
747    /// attacker who swaps `<id>.json` on disk between the perm check
748    /// and the ciphertext read cannot influence the bytes we decrypt.
749    ///
750    /// Cache: this path intentionally skips the in-memory entry cache.
751    /// The cache is read-mostly and seeded by `load_entry`, which is
752    /// fine for public-key lookups but defeats the perm gate (a cached
753    /// entry would let `signer()` return without ever consulting the
754    /// on-disk perms). The signing path is rare enough that the extra
755    /// disk read is not a hot spot.
756    fn read_entry_with_perm_check(&self, id: &str) -> Result<EncryptedEntry, KeyError> {
757        let path = self.entry_path(id);
758
759        // Open once. NotFound surfaces as `KeyError::NotFound` to
760        // match the legacy `read_entry` shape; any other I/O error
761        // (permission denied at the *open* layer, EIO, etc.)
762        // propagates via the `From<io::Error>` impl.
763        let mut file = match fs::File::open(&path) {
764            Ok(f) => f,
765            Err(e) if e.kind() == io::ErrorKind::NotFound => {
766                return Err(KeyError::NotFound(id.to_string()));
767            }
768            Err(e) => return Err(KeyError::Io(e)),
769        };
770
771        // Perm check on the open fd. On Unix `File::metadata` is
772        // documented to call `fstat` on the underlying fd, which pins
773        // the inode -- a subsequent path swap on disk cannot change
774        // what we see. The bypass env var continues to short-circuit.
775        check_open_key_file_perms(&path, &file)?;
776
777        // Read the full ciphertext envelope from the same fd.
778        let mut bytes = Vec::new();
779        file.read_to_end(&mut bytes)?;
780
781        let entry: EncryptedEntry = serde_json::from_slice(&bytes)?;
782        Ok(entry)
783    }
784
785    fn manifest_path(&self) -> PathBuf {
786        self.dir.join("manifest.json")
787    }
788
789    fn read_manifest(&self) -> Result<Manifest, KeyError> {
790        let path = self.manifest_path();
791        if !path.exists() {
792            return Ok(Manifest::default());
793        }
794        let bytes = fs::read(&path)?;
795        Ok(serde_json::from_slice(&bytes)?)
796    }
797
798    fn write_manifest(&self, m: &Manifest) -> Result<(), KeyError> {
799        let json = serde_json::to_vec_pretty(m)?;
800        write_file_600(&self.manifest_path(), &json)?;
801        Ok(())
802    }
803}
804
805// --- Crypto helpers ---
806//
807// AEAD choice: AES-256-GCM via the RustCrypto `aes-gcm` 0.10 crate.
808// Reasons:
809//   - Matches the original (documented but never implemented) intent of
810//     the keystore, so audit reports and SECURITY.md don't need to be
811//     re-anchored on a different primitive.
812//   - Well-audited, widely deployed, no platform gotchas.
813//   - `chacha20poly1305` would have been a defensible alternative
814//     (slightly better software performance), but the migration cost of
815//     changing the documented primitive while we already have to ship a
816//     migration for the broken construction is not worth it.
817//
818// On-disk v2 format (`encrypt_for_disk_v2`):
819//   [ magic = 0x54 ('T') ]   1 byte
820//   [ version = 0x02     ]   1 byte
821//   [ nonce              ]  12 bytes (random per encryption)
822//   [ ciphertext || tag  ]  N + 16 bytes (tag appended by aead crate)
823//
824// The first byte (0x54) is a structural sentinel so we can dispatch on
825// the format without relying on length heuristics. v1 ciphertexts start
826// with the first byte of their random nonce, so the chance of an
827// accidental v1 entry that looks like v2 is ~1/2^16 (matching both magic
828// AND version byte) and we still re-validate by AEAD-decrypting; if the
829// AEAD fails on something that looks like v2, we fall back to v1.
830
831const KEYSTORE_MAGIC: u8 = 0x54; // 'T'
832const KEYSTORE_VERSION_V2: u8 = 0x02;
833
834/// Build the v2 keystore AEAD AAD.
835///
836/// The AAD binds two things into the GCM tag beyond ciphertext+nonce:
837///
838/// 1. **Framing prefix** (`[KEYSTORE_MAGIC, KEYSTORE_VERSION_V2]`) so
839///    flipping the magic or version byte on disk surfaces as a MAC
840///    failure rather than dispatcher confusion (the M2 audit finding).
841/// 2. **Entry identity** (`entry_id` and `public_key`) so an attacker
842///    with write access to `~/.treeship/keys/` cannot copy entry A's
843///    `enc_priv_key` ciphertext into entry B's JSON envelope. Without
844///    this binding, the swap would decrypt cleanly (same machine key,
845///    same framing-only AAD) and the signer for advertised key id A
846///    would silently sign with key B's secret scalar — un-binding
847///    `KeyInfo.public_key` from the actual scalar in use. This closes
848///    the "intra-keystore swap" class flagged in the post-merge audit
849///    of TS-2026-001.
850///
851/// Every variable-length field is length-prefixed with a big-endian
852/// u32 before its bytes. Concatenating variable-length fields without
853/// length prefixes is a forgery class (an attacker who controls field
854/// boundaries can shift bytes between fields and present a different
855/// `(entry_id, public_key)` pair whose AAD-bytes serialize identically).
856/// `entry_id` is a fixed-prefix `key_<hex>` string in practice, but we
857/// length-prefix it anyway to defend against future id schemes.
858///
859/// The AAD must be byte-identical on encrypt and decrypt. Future
860/// versions (V3+) get their own builder; the dispatcher picks which
861/// to use based on the framing prefix.
862fn build_aad_v2(entry_id: &str, public_key: &[u8]) -> Vec<u8> {
863    let mut aad = Vec::with_capacity(2 + 4 + entry_id.len() + 4 + public_key.len());
864    aad.push(KEYSTORE_MAGIC);
865    aad.push(KEYSTORE_VERSION_V2);
866    aad.extend_from_slice(&(entry_id.len() as u32).to_be_bytes());
867    aad.extend_from_slice(entry_id.as_bytes());
868    aad.extend_from_slice(&(public_key.len() as u32).to_be_bytes());
869    aad.extend_from_slice(public_key);
870    aad
871}
872
873/// AES-256-GCM (the real one) encrypt for at-rest keystore storage.
874/// Returns the framed v2 blob ready to drop into `EncryptedEntry::enc_priv_key`.
875///
876/// Output: `[magic, version, nonce(12), ciphertext || tag(16)]`.
877///
878/// The AEAD's Associated Authenticated Data binds:
879/// - the framing prefix (M2 — flipping magic/version surfaces as MAC failure)
880/// - the entry id and public key (post-merge audit fix-up — closes the
881///   intra-keystore swap class where a local attacker copies entry A's
882///   `enc_priv_key` into entry B's JSON envelope).
883///
884/// See `build_aad_v2` for the exact layout. `entry_id` and `public_key`
885/// must match what gets serialized into the `EncryptedEntry` JSON;
886/// `decrypt_for_disk_v2` reads them back from the deserialized entry
887/// to recompute the AAD.
888fn encrypt_for_disk_v2(
889    key: &[u8; 32],
890    entry_id: &str,
891    public_key: &[u8],
892    plaintext: &[u8],
893) -> Result<Vec<u8>, String> {
894    // Wrap the in-memory AEAD key in Zeroizing so the local stack copy
895    // is wiped on drop. The aes-gcm cipher object owns its own internal
896    // expanded key schedule; that's outside our control, but the raw
897    // 32-byte buffer at this scope is ours to clear.
898    let key_buf: Zeroizing<[u8; 32]> = Zeroizing::new(*key);
899    let aead_key: &AesKey<Aes256Gcm> = AesKey::<Aes256Gcm>::from_slice(key_buf.as_slice());
900    let cipher = Aes256Gcm::new(aead_key);
901
902    // 96-bit random nonce from the OS CSPRNG.
903    let nonce = Aes256Gcm::generate_nonce(&mut AeadOsRng);
904
905    let aad = build_aad_v2(entry_id, public_key);
906    let ciphertext = cipher
907        .encrypt(
908            &nonce,
909            Payload {
910                msg: plaintext,
911                aad: aad.as_slice(),
912            },
913        )
914        .map_err(|e| format!("aead encrypt failed: {e}"))?;
915
916    let mut out = Vec::with_capacity(2 + 12 + ciphertext.len());
917    out.push(KEYSTORE_MAGIC);
918    out.push(KEYSTORE_VERSION_V2);
919    out.extend_from_slice(nonce.as_slice());
920    out.extend_from_slice(&ciphertext);
921    Ok(out)
922}
923
924/// AES-256-GCM decrypt of a v2 framed blob. Uses the same AAD binding
925/// as `encrypt_for_disk_v2`:
926///   - framing prefix (so a tampered magic/version surfaces as MAC failure)
927///   - entry id + public key (so swapping `enc_priv_key` between entries
928///     in the same keystore surfaces as MAC failure).
929///
930/// `entry_id` and `public_key` come from the `EncryptedEntry` JSON
931/// envelope that holds `blob`. The caller is responsible for passing the
932/// *envelope's* id and pubkey, not values from some other source — that
933/// is precisely what binds the ciphertext to its envelope.
934fn decrypt_v2(
935    key: &[u8; 32],
936    entry_id: &str,
937    public_key: &[u8],
938    blob: &[u8],
939) -> Result<Vec<u8>, String> {
940    // Minimum: magic(1) + version(1) + nonce(12) + tag(16) = 30 bytes.
941    if blob.len() < 30 {
942        return Err("v2 ciphertext too short".into());
943    }
944    if blob[0] != KEYSTORE_MAGIC || blob[1] != KEYSTORE_VERSION_V2 {
945        return Err("v2 ciphertext has wrong magic/version".into());
946    }
947    let nonce_bytes = &blob[2..14];
948    let ct = &blob[14..];
949
950    let key_buf: Zeroizing<[u8; 32]> = Zeroizing::new(*key);
951    let aead_key: &AesKey<Aes256Gcm> = AesKey::<Aes256Gcm>::from_slice(key_buf.as_slice());
952    let cipher = Aes256Gcm::new(aead_key);
953    let nonce = Nonce::from_slice(nonce_bytes);
954
955    let aad = build_aad_v2(entry_id, public_key);
956    cipher
957        .decrypt(
958            nonce,
959            Payload {
960                msg: ct,
961                aad: aad.as_slice(),
962            },
963        )
964        .map_err(|_| "MAC verification failed — key file may be corrupt or wrong machine".into())
965}
966
967/// Returns true iff `blob` is shaped like a v1 (legacy) ciphertext.
968/// Used by the dispatcher to decide whether a successful decrypt should
969/// trigger a transparent re-encrypt to v2.
970fn is_legacy_v1(blob: &[u8]) -> bool {
971    // A v2 blob always starts with [magic, version]. Anything else
972    // (including the empty enc_priv_key case during partial writes) is
973    // treated as legacy and routed through the v1 path, which will fail
974    // cleanly on garbage.
975    !(blob.len() >= 2 && blob[0] == KEYSTORE_MAGIC && blob[1] == KEYSTORE_VERSION_V2)
976}
977
978/// Top-level decrypt dispatcher used by the keystore. Tries v2 if the
979/// blob carries the magic+version prefix, otherwise falls through to the
980/// legacy v1 path. If a blob looks like v2 but AEAD verification fails,
981/// we also try v1 — this defends against the (negligible) probability
982/// that a legacy ciphertext's random first two bytes happen to collide
983/// with our magic+version.
984///
985/// M1 (TS-2026-001 audit): when the blob is v2-shaped and BOTH the v2
986/// AEAD and the v1 fallback fail, surface the v2 error rather than the
987/// v1 error. v1's failure on a v2-shaped blob is mechanical (wrong
988/// MAC computed under the wrong construction) and tells the user
989/// nothing useful; v2's failure is the actually-relevant signal
990/// (MAC verification under the documented AEAD). The previous code
991/// would mask the meaningful error with a confused legacy error
992/// message that pointed at the wrong remediation.
993fn decrypt_from_disk(
994    key: &[u8; 32],
995    entry_id: &str,
996    public_key: &[u8],
997    enc_data: &[u8],
998    legacy_nonce_field: &[u8],
999) -> Result<Zeroizing<Vec<u8>>, String> {
1000    if !is_legacy_v1(enc_data) {
1001        match decrypt_v2(key, entry_id, public_key, enc_data) {
1002            Ok(pt) => return Ok(Zeroizing::new(pt)),
1003            Err(v2_err) => {
1004                // Collision fallback. v1 entries had random first bytes;
1005                // there's a vanishing chance one looks like v2 framing.
1006                // Try v1 first; if it succeeds we have a legitimate
1007                // legacy entry whose framing happens to look v2-shaped.
1008                // If v1 also fails, surface the v2 error (the
1009                // semantically meaningful one) rather than v1's
1010                // mechanical-junk failure.
1011                return match decrypt_legacy_v1(key, enc_data, legacy_nonce_field) {
1012                    Ok(pt) => Ok(Zeroizing::new(pt)),
1013                    Err(_) => Err(v2_err),
1014                };
1015            }
1016        }
1017    }
1018    decrypt_legacy_v1(key, enc_data, legacy_nonce_field).map(Zeroizing::new)
1019}
1020
1021/// DEPRECATED: legacy at-rest decryption for keystores written before
1022/// v0.10.3. This is the SHA-256-CTR + HMAC-SHA-256 construction that
1023/// was mis-labelled as AES-256-GCM (TS-2026-001). The CTR keystream is
1024/// also degenerate (the same `enc_key` byte is reused once per
1025/// plaintext byte, since `block[i % 32]` indexes the same SHA-256 output
1026/// modulo 32), so the construction is NOT a real stream cipher even
1027/// ignoring the AEAD mislabelling.
1028///
1029/// Kept ONLY to migrate existing on-disk keystores forward to the v2
1030/// AEAD format. Never call this for new writes. The encrypt counterpart
1031/// has been removed from the v2 codepath — the only place v1
1032/// ciphertexts come from is files written by older Treeship versions.
1033pub fn aes_gcm_decrypt(
1034    key: &[u8; 32],
1035    enc_data: &[u8],
1036    _nonce_unused: &[u8],
1037) -> Result<Vec<u8>, String> {
1038    // Preserved as a public symbol because the `treeship-vi` sibling
1039    // crate calls it directly. vi only ever produces v1 ciphertexts
1040    // (its `aes_gcm_encrypt` shim calls `legacy_v1_encrypt`) and has
1041    // no concept of the `EncryptedEntry` envelope that carries the
1042    // entry id + public key the v2 AAD now requires. Route this shim
1043    // directly through the legacy v1 path so vi's call site keeps
1044    // working byte-for-byte; vi's eventual migration release will
1045    // adopt its own AEAD path with its own envelope binding.
1046    decrypt_legacy_v1(key, enc_data, _nonce_unused)
1047}
1048
1049/// DEPRECATED: legacy at-rest encryption. Same caveats as
1050/// `aes_gcm_decrypt`. Kept ONLY as a public symbol for compatibility
1051/// with the `treeship-vi` sibling crate; the core keystore no longer
1052/// produces v1 ciphertexts.
1053///
1054/// New code MUST use `encrypt_for_disk_v2`. This function still
1055/// produces v1-format output so the vi crate's on-disk format remains
1056/// byte-stable until it migrates on its own cadence.
1057pub fn aes_gcm_encrypt(key: &[u8; 32], plaintext: &[u8]) -> Result<(Vec<u8>, Vec<u8>), String> {
1058    legacy_v1_encrypt(key, plaintext)
1059}
1060
1061/// Legacy v1 encrypt. SHA-256-CTR + HMAC-SHA-256. DO NOT USE for new
1062/// writes — present only so vi-keystore callers keep working until
1063/// they migrate. See `aes_gcm_encrypt` doc-comment for the security
1064/// caveats.
1065fn legacy_v1_encrypt(key: &[u8; 32], plaintext: &[u8]) -> Result<(Vec<u8>, Vec<u8>), String> {
1066    use sha2::Sha256;
1067
1068    let mut nonce = [0u8; 12];
1069    // v0.10.4 P1 audit: nonce reuse breaks AEAD. Read directly from the OS
1070    // CSPRNG via OsRng rather than the userland thread_rng, which can mis-seed
1071    // across forks / on some WASM targets. Legacy v1 write path is kept for
1072    // treeship-vi byte-stability but still needs sound nonces.
1073    OsRng.fill_bytes(&mut nonce);
1074
1075    let mut enc_key_input = key.to_vec();
1076    enc_key_input.extend_from_slice(&nonce);
1077    enc_key_input.extend_from_slice(b"enc");
1078    let enc_key = Sha256::digest(&enc_key_input);
1079
1080    let mut mac_key_input = key.to_vec();
1081    mac_key_input.extend_from_slice(&nonce);
1082    mac_key_input.extend_from_slice(b"mac");
1083    let mac_key = Sha256::digest(&mac_key_input);
1084
1085    let ciphertext: Vec<u8> = plaintext.iter().enumerate().map(|(i, &b)| {
1086        let mut block_input = enc_key.to_vec();
1087        block_input.extend_from_slice(&(i as u64).to_le_bytes());
1088        let block = Sha256::digest(&block_input);
1089        b ^ block[i % 32]
1090    }).collect();
1091
1092    let mut mac_input = mac_key.to_vec();
1093    mac_input.extend_from_slice(&nonce);
1094    mac_input.extend_from_slice(&ciphertext);
1095    let mac = Sha256::digest(&mac_input);
1096
1097    let mut out = Vec::with_capacity(12 + 32 + ciphertext.len());
1098    out.extend_from_slice(&nonce);
1099    out.extend_from_slice(&mac);
1100    out.extend_from_slice(&ciphertext);
1101
1102    Ok((out, nonce.to_vec()))
1103}
1104
1105/// Legacy v1 decrypt. SHA-256-CTR + HMAC-SHA-256. See the module-level
1106/// notes on TS-2026-001 for why this is broken; kept only to migrate
1107/// existing keystores forward.
1108fn decrypt_legacy_v1(
1109    key: &[u8; 32],
1110    enc_data: &[u8],
1111    _nonce_unused: &[u8],
1112) -> Result<Vec<u8>, String> {
1113    if enc_data.len() < 44 {
1114        return Err("ciphertext too short".into());
1115    }
1116    use sha2::Sha256;
1117
1118    let nonce      = &enc_data[..12];
1119    let stored_mac = &enc_data[12..44];
1120    let ciphertext = &enc_data[44..];
1121
1122    let nonce_arr: [u8; 12] = nonce.try_into().unwrap();
1123
1124    let mut enc_key_input = key.to_vec();
1125    enc_key_input.extend_from_slice(&nonce_arr);
1126    enc_key_input.extend_from_slice(b"enc");
1127    let enc_key = Sha256::digest(&enc_key_input);
1128
1129    let mut mac_key_input = key.to_vec();
1130    mac_key_input.extend_from_slice(&nonce_arr);
1131    mac_key_input.extend_from_slice(b"mac");
1132    let mac_key = Sha256::digest(&mac_key_input);
1133
1134    let mut mac_input = mac_key.to_vec();
1135    mac_input.extend_from_slice(&nonce_arr);
1136    mac_input.extend_from_slice(ciphertext);
1137    let computed_mac = Sha256::digest(&mac_input);
1138
1139    let mac_ok = stored_mac.iter().zip(computed_mac.iter())
1140        .fold(0u8, |acc, (a, b)| acc | (a ^ b)) == 0;
1141
1142    if !mac_ok {
1143        return Err("MAC verification failed — key file may be corrupt or wrong machine".into());
1144    }
1145
1146    let plaintext: Vec<u8> = ciphertext.iter().enumerate().map(|(i, &b)| {
1147        let mut block_input = enc_key.to_vec();
1148        block_input.extend_from_slice(&(i as u64).to_le_bytes());
1149        let block = Sha256::digest(&block_input);
1150        b ^ block[i % 32]
1151    }).collect();
1152
1153    Ok(plaintext)
1154}
1155
1156// --- Machine key derivation ---
1157
1158pub fn derive_machine_key(store_dir: &Path) -> Result<[u8; 32], KeyError> {
1159    // 1. Linux: /etc/machine-id (stable across reboots)
1160    if let Ok(id) = fs::read_to_string("/etc/machine-id") {
1161        let trimmed = id.trim();
1162        if !trimmed.is_empty() {
1163            let mut h = Sha256::new();
1164            h.update(trimmed.as_bytes());
1165            h.update(store_dir.to_string_lossy().as_bytes());
1166            return Ok(h.finalize().into());
1167        }
1168    }
1169
1170    // 2. macOS: hostname + username derivation (v1, backward compatible).
1171    //
1172    // TODO(v0.7.0): Migrate to IOPlatformSerialNumber-based derivation.
1173    // The serial number is more stable (survives hostname and username
1174    // changes), but switching now would silently invalidate all existing
1175    // keys on macOS. A proper migration needs to:
1176    //   1. Try the new derivation first.
1177    //   2. On decryption failure, fall back to hostname+username.
1178    //   3. If legacy succeeds, re-encrypt with the new key and save.
1179    // Until that migration tooling is in place, keep hostname+username
1180    // as the primary derivation so existing users are not locked out.
1181    #[cfg(target_os = "macos")]
1182    {
1183        let hostname = std::process::Command::new("hostname")
1184            .output()
1185            .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
1186            .unwrap_or_default();
1187        let username = std::env::var("USER").unwrap_or_default();
1188        if !hostname.is_empty() && !username.is_empty() {
1189            let mut h = Sha256::new();
1190            h.update(b"treeship-machine-key:");
1191            h.update(hostname.as_bytes());
1192            h.update(b":");
1193            h.update(username.as_bytes());
1194            h.update(b":");
1195            h.update(store_dir.to_string_lossy().as_bytes());
1196            return Ok(h.finalize().into());
1197        }
1198    }
1199
1200    // 3. Fallback: random seed file. Co-located with the keystore so a
1201    //    project-local keystore (/proj/.treeship/keys/) keeps its seed at
1202    //    /proj/.treeship/machine_seed -- never reaching for ~/.treeship.
1203    //    A global keystore (~/.treeship/keys/) co-locates to
1204    //    ~/.treeship/machine_seed, which is byte-identical to the
1205    //    pre-v0.9.6 location, so existing global keystores keep working.
1206    //
1207    //    Backward-compat read order:
1208    //      1. <store_dir>/../machine_seed  (the new co-located path)
1209    //      2. ~/.treeship/machine_seed     (the old hardcoded path)
1210    //    Write order on first creation:
1211    //      1. <store_dir>/../machine_seed  if the parent exists/is writable
1212    //      2. ~/.treeship/machine_seed     as a last resort
1213    //
1214    //    This makes project-local config truly self-contained: an
1215    //    isolated /proj keystore can decrypt its own keys even when
1216    //    the user's ~/.treeship is corrupt or on a different machine,
1217    //    closing the trust-fabric isolation gap that blocked
1218    //    project-local smoke tests.
1219    let local_seed_path = store_dir.parent().map(|p| p.join("machine_seed"));
1220    let home = std::env::var("HOME")
1221        .map(std::path::PathBuf::from)
1222        .map_err(|_| KeyError::Crypto("HOME not set".to_string()))?;
1223    let global_seed_path = home.join(".treeship").join("machine_seed");
1224
1225    let seed = if let Some(local) = local_seed_path.as_ref().filter(|p| p.exists()) {
1226        fs::read_to_string(local).map_err(KeyError::Io)?
1227    } else if global_seed_path.exists() {
1228        // Backward-compat: an existing global seed keeps decrypting any
1229        // keystore that was encrypted under it (in particular the
1230        // standard ~/.treeship/keys/ case where local == global).
1231        fs::read_to_string(&global_seed_path).map_err(KeyError::Io)?
1232    } else {
1233        let mut bytes = [0u8; 32];
1234        // v0.10.4 P1 audit: this seed becomes the machine-key fallback used to
1235        // wrap on-disk private keys. Source straight from the OS entropy pool.
1236        OsRng.fill_bytes(&mut bytes);
1237        let seed_hex = hex_encode(&bytes);
1238
1239        // Prefer creating the seed locally. Falls back to the global
1240        // path only when the keystore has no usable parent (rare;
1241        // happens when store_dir is "/" or similar pathological input).
1242        let target = match local_seed_path.as_ref() {
1243            Some(p) => {
1244                let _ = fs::create_dir_all(p.parent().unwrap_or(Path::new(".")));
1245                p.clone()
1246            }
1247            None => {
1248                let _ = fs::create_dir_all(global_seed_path.parent().unwrap_or(Path::new(".")));
1249                global_seed_path.clone()
1250            }
1251        };
1252        fs::write(&target, &seed_hex).map_err(KeyError::Io)?;
1253        #[cfg(unix)]
1254        {
1255            use std::os::unix::fs::PermissionsExt;
1256            let _ = fs::set_permissions(&target, fs::Permissions::from_mode(0o600));
1257        }
1258        seed_hex
1259    };
1260
1261    let mut h = Sha256::new();
1262    h.update(b"treeship-machine-key-fallback:");
1263    h.update(seed.trim().as_bytes());
1264    h.update(b":");
1265    h.update(store_dir.to_string_lossy().as_bytes());
1266    Ok(h.finalize().into())
1267}
1268
1269/// Stable machine key derivation for NEW keys (VI P-256, etc).
1270/// Uses hardware identifiers that survive hostname/user changes.
1271/// For legacy ship Ed25519 keys, use `derive_machine_key()` instead.
1272pub fn derive_machine_key_stable(store_dir: &Path) -> Result<[u8; 32], KeyError> {
1273    // 1. Linux: /etc/machine-id
1274    if let Ok(id) = fs::read_to_string("/etc/machine-id") {
1275        let trimmed = id.trim();
1276        if !trimmed.is_empty() {
1277            let mut h = Sha256::new();
1278            h.update(b"treeship-machine-key-v2:");
1279            h.update(trimmed.as_bytes());
1280            h.update(b":");
1281            h.update(store_dir.to_string_lossy().as_bytes());
1282            return Ok(h.finalize().into());
1283        }
1284    }
1285
1286    // 2. macOS: IOPlatformSerialNumber (hardware serial, stable across
1287    //    hostname changes, user renames, non-interactive shells)
1288    #[cfg(target_os = "macos")]
1289    {
1290        if let Ok(output) = std::process::Command::new("ioreg")
1291            .args(["-rd1", "-c", "IOPlatformExpertDevice"])
1292            .output()
1293        {
1294            let stdout = String::from_utf8_lossy(&output.stdout);
1295            for line in stdout.lines() {
1296                if line.contains("IOPlatformSerialNumber") {
1297                    if let Some(serial) = line.split('"').nth(3) {
1298                        if !serial.is_empty() {
1299                            let mut h = Sha256::new();
1300                            h.update(b"treeship-machine-key-v2:");
1301                            h.update(serial.as_bytes());
1302                            h.update(b":");
1303                            h.update(store_dir.to_string_lossy().as_bytes());
1304                            return Ok(h.finalize().into());
1305                        }
1306                    }
1307                }
1308            }
1309        }
1310    }
1311
1312    // 3. Fallback: persistent random seed in ~/.treeship/.internal/
1313    //    Separate from key material. Mode 0600.
1314    let home = std::env::var("HOME")
1315        .map(std::path::PathBuf::from)
1316        .map_err(|_| KeyError::Crypto("HOME not set".to_string()))?;
1317    let seed_dir = home.join(".treeship").join(".internal");
1318    let _ = fs::create_dir_all(&seed_dir);
1319    #[cfg(unix)]
1320    {
1321        use std::os::unix::fs::PermissionsExt;
1322        let _ = fs::set_permissions(&seed_dir, fs::Permissions::from_mode(0o700));
1323    }
1324
1325    let seed_path = seed_dir.join("machine_seed_v2");
1326    let seed = if seed_path.exists() {
1327        fs::read_to_string(&seed_path).map_err(KeyError::Io)?
1328    } else {
1329        let mut bytes = [0u8; 32];
1330        // v0.10.4 P1 audit: machine_seed_v2 backs the v2 machine-key
1331        // fallback. Same OsRng rationale as the v1 seed above.
1332        OsRng.fill_bytes(&mut bytes);
1333        let seed_hex = hex_encode(&bytes);
1334        fs::write(&seed_path, &seed_hex).map_err(KeyError::Io)?;
1335        #[cfg(unix)]
1336        {
1337            use std::os::unix::fs::PermissionsExt;
1338            let _ = fs::set_permissions(&seed_path, fs::Permissions::from_mode(0o600));
1339        }
1340        seed_hex
1341    };
1342
1343    let mut h = Sha256::new();
1344    h.update(b"treeship-machine-key-v2-fallback:");
1345    h.update(seed.trim().as_bytes());
1346    h.update(b":");
1347    h.update(store_dir.to_string_lossy().as_bytes());
1348    Ok(h.finalize().into())
1349}
1350
1351// --- Utility ---
1352
1353fn new_key_id() -> KeyId {
1354    let mut b = [0u8; 8];
1355    // v0.10.4 P1 audit: key_id is mixed into AAD by encrypt_for_disk_v2, so
1356    // collisions or low-entropy ids would weaken the AAD binding. Use OsRng
1357    // directly so the id is OS-CSPRNG-quality even under fork or odd targets.
1358    OsRng.fill_bytes(&mut b);
1359    format!("key_{}", hex_encode(&b))
1360}
1361
1362fn fingerprint(pub_key: &[u8]) -> String {
1363    let h = Sha256::digest(pub_key);
1364    hex_encode(&h[..8])
1365}
1366
1367fn hex_encode(b: &[u8]) -> String {
1368    b.iter().fold(String::new(), |mut s, byte| {
1369        s.push_str(&format!("{:02x}", byte));
1370        s
1371    })
1372}
1373
1374/// Verify a private-key file has restrictive permissions before loading
1375/// it for signing. Returns `Ok(())` on non-Unix platforms, when the
1376/// `TREESHIP_ALLOW_INSECURE_KEY_PERMS=1` escape hatch is set, or when
1377/// the file is not group/world accessible. Otherwise returns
1378/// `KeyError::InsecureKeyPerms` with the offending path and mode.
1379///
1380/// **TOCTOU caveat:** this path-based check has an unavoidable race
1381/// window between the `stat` and any subsequent `open` of the same
1382/// path. New signing-path callers MUST use
1383/// `check_open_key_file_perms` (fstat on an already-open fd) instead;
1384/// this function is retained only for non-signing callers that
1385/// already accept the race (e.g. `treeship doctor` scanning the
1386/// keystore directory).
1387#[allow(dead_code)]
1388fn check_key_file_perms(path: &Path) -> Result<(), KeyError> {
1389    #[cfg(unix)]
1390    {
1391        use std::os::unix::fs::PermissionsExt;
1392        if std::env::var_os("TREESHIP_ALLOW_INSECURE_KEY_PERMS")
1393            .map(|v| v == "1")
1394            .unwrap_or(false)
1395        {
1396            return Ok(());
1397        }
1398        // Missing files are reported by the caller as NotFound -- don't
1399        // mask that with a perm error.
1400        let meta = match fs::metadata(path) {
1401            Ok(m) => m,
1402            Err(_) => return Ok(()),
1403        };
1404        let mode = meta.permissions().mode();
1405        if mode & 0o077 != 0 {
1406            return Err(KeyError::InsecureKeyPerms {
1407                path: path.to_path_buf(),
1408                mode,
1409            });
1410        }
1411    }
1412    let _ = path;
1413    Ok(())
1414}
1415
1416/// Race-free perm gate: runs `fstat` on an already-open `File` and
1417/// rejects if the mode has any group or world bits. Use this from the
1418/// signing path: open the key file once, hand the resulting `File` to
1419/// this function, then read from the SAME `File` -- the inode is
1420/// pinned by the open fd, so a path-level swap between perm-check and
1421/// read cannot influence what we end up decrypting.
1422///
1423/// `path` is carried only for error reporting; it is never re-opened.
1424/// The `TREESHIP_ALLOW_INSECURE_KEY_PERMS=1` bypass is honored
1425/// identically to `check_key_file_perms` so existing CI workflows keep
1426/// working.
1427#[allow(unused_variables)]
1428fn check_open_key_file_perms(path: &Path, file: &fs::File) -> Result<(), KeyError> {
1429    #[cfg(unix)]
1430    {
1431        use std::os::unix::fs::PermissionsExt;
1432        if std::env::var_os("TREESHIP_ALLOW_INSECURE_KEY_PERMS")
1433            .map(|v| v == "1")
1434            .unwrap_or(false)
1435        {
1436            return Ok(());
1437        }
1438        // `File::metadata` on Unix calls `fstat(fd)` -- it does NOT
1439        // re-resolve the path, so the result describes the same inode
1440        // we will read from. This is the structural property that
1441        // makes the gate race-free.
1442        let meta = file.metadata()?;
1443        let mode = meta.permissions().mode();
1444        if mode & 0o077 != 0 {
1445            return Err(KeyError::InsecureKeyPerms {
1446                path: path.to_path_buf(),
1447                mode,
1448            });
1449        }
1450    }
1451    Ok(())
1452}
1453
1454impl Store {
1455    /// Repair file permissions on the keystore directory and every file
1456    /// inside it: dir to 0700, key entry files and manifest to 0600.
1457    /// Used by `treeship doctor --fix`. No-op on non-Unix.
1458    ///
1459    /// Returns the list of (path, old_mode, new_mode) tuples for paths
1460    /// that were actually changed, so the caller can report what it did.
1461    pub fn fix_perms(&self) -> Result<Vec<(PathBuf, u32, u32)>, KeyError> {
1462        let mut changed: Vec<(PathBuf, u32, u32)> = Vec::new();
1463        #[cfg(unix)]
1464        {
1465            use std::os::unix::fs::PermissionsExt;
1466
1467            let dir_meta = fs::metadata(&self.dir)?;
1468            let dir_mode = dir_meta.permissions().mode() & 0o777;
1469            if dir_mode != 0o700 {
1470                fs::set_permissions(&self.dir, fs::Permissions::from_mode(0o700))?;
1471                changed.push((self.dir.clone(), dir_mode, 0o700));
1472            }
1473
1474            for entry in fs::read_dir(&self.dir)? {
1475                let entry = entry?;
1476                let path = entry.path();
1477                if !entry.file_type()?.is_file() {
1478                    continue;
1479                }
1480                let mode = entry.metadata()?.permissions().mode() & 0o777;
1481                if mode != 0o600 {
1482                    fs::set_permissions(&path, fs::Permissions::from_mode(0o600))?;
1483                    changed.push((path, mode, 0o600));
1484                }
1485            }
1486        }
1487        Ok(changed)
1488    }
1489}
1490
1491/// Open (or create) the per-entry migration sentinel lock file with
1492/// owner-only permissions (0o600 on Unix). The handle returned can be
1493/// passed to `fs2::FileExt::lock_exclusive` to serialize concurrent
1494/// v1->v2 migrations of the same entry across processes/threads
1495/// (TS-2026-001 H3).
1496///
1497/// On Unix the mode is set at creation via `OpenOptionsExt::mode` so the
1498/// sentinel never has a moment of looser perms. On non-Unix platforms the
1499/// file inherits parent ACLs (the keystore dir is owner-scoped already).
1500#[cfg(unix)]
1501fn open_migration_lock_file(path: &Path) -> Result<fs::File, io::Error> {
1502    use std::os::unix::fs::OpenOptionsExt;
1503    fs::OpenOptions::new()
1504        .create(true)
1505        .read(true)
1506        .write(true)
1507        .truncate(false)
1508        .mode(0o600)
1509        .open(path)
1510}
1511
1512#[cfg(not(unix))]
1513fn open_migration_lock_file(path: &Path) -> Result<fs::File, io::Error> {
1514    fs::OpenOptions::new()
1515        .create(true)
1516        .read(true)
1517        .write(true)
1518        .truncate(false)
1519        .open(path)
1520}
1521
1522/// Atomically write `data` to `path` with owner-only (0o600) permissions on
1523/// Unix.
1524///
1525/// TS-2026-001 H1 + H2: the prior implementation was truncate-then-write,
1526/// which destroys the original file if the process crashes mid-write. For
1527/// the keystore that's catastrophic -- a crash during transparent v1->v2
1528/// migration would leave a zero-byte (or partial) key entry on disk and
1529/// the private key would be unrecoverable. This implementation writes to
1530/// a sibling tmp file in the same directory, fsyncs the bytes through to
1531/// the platter, then performs a POSIX-atomic same-filesystem `rename(2)`.
1532/// A crash before the rename leaves the original file intact; the tmp
1533/// file is harmless garbage that the next successful write will overwrite.
1534///
1535/// The 0o600 mode is set at file *creation* via `OpenOptionsExt::mode`
1536/// so there is no window in which the file exists with looser perms.
1537/// The prior `set_permissions` post-write call is dropped because it was
1538/// redundant and gave the appearance (but not the substance) of safety.
1539fn write_file_600(path: &Path, data: &[u8]) -> Result<(), KeyError> {
1540    // Place the tmp file in the same directory as the final path so the
1541    // rename stays on the same filesystem (cross-FS renames are not atomic
1542    // and degrade to copy+unlink, defeating the whole point).
1543    let tmp_path = path.with_extension("tmp");
1544
1545    // Best-effort cleanup of any stale tmp from a prior crash before we
1546    // start writing. Ignored on error -- if it doesn't exist that's fine,
1547    // and if it can't be removed the OpenOptions call below will surface
1548    // the underlying error.
1549    let _ = fs::remove_file(&tmp_path);
1550
1551    let write_result: Result<(), KeyError> = (|| {
1552        #[cfg(unix)]
1553        let open = {
1554            use std::os::unix::fs::OpenOptionsExt;
1555            fs::OpenOptions::new()
1556                .write(true)
1557                .create(true)
1558                .truncate(true)
1559                .mode(0o600)
1560                .open(&tmp_path)
1561        };
1562        #[cfg(not(unix))]
1563        let open = fs::OpenOptions::new()
1564            .write(true)
1565            .create(true)
1566            .truncate(true)
1567            .open(&tmp_path);
1568
1569        let mut f = open?;
1570        f.write_all(data)?;
1571        // sync_all flushes both data AND metadata, so on a crash after
1572        // the rename, fsck/journal recovery sees the new bytes -- not a
1573        // ghost inode with stale content.
1574        f.sync_all()?;
1575        Ok(())
1576    })();
1577
1578    if let Err(e) = write_result {
1579        // Best-effort cleanup so the next write isn't surprised by a
1580        // half-written tmp. Errors here are not surfaced: the original
1581        // write error is what the caller needs to see.
1582        let _ = fs::remove_file(&tmp_path);
1583        return Err(e);
1584    }
1585
1586    // Atomic same-filesystem rename. On Unix this is a single
1587    // rename(2) syscall guaranteed by POSIX to be atomic with respect
1588    // to other observers. On Windows std::fs::rename is implemented
1589    // via MoveFileEx with MOVEFILE_REPLACE_EXISTING (atomic on NTFS,
1590    // best-effort elsewhere). After this returns Ok, the new bytes are
1591    // visible at `path` and the tmp file no longer exists.
1592    if let Err(e) = fs::rename(&tmp_path, path) {
1593        let _ = fs::remove_file(&tmp_path);
1594        return Err(KeyError::Io(e));
1595    }
1596
1597    // fsync the parent directory so the rename's directory-entry update
1598    // is itself persisted. The previous code only fsynced the tmp
1599    // file's contents (via sync_all on the file handle) -- on ext4/xfs
1600    // with default mount options, the rename can return to userspace
1601    // before the dirent metadata has been written to the journal. A
1602    // power loss in that window leaves the directory entry pointing at
1603    // the OLD inode (or, worse, missing entirely if both old and new
1604    // were unlinked from the parent), even though both the data bytes
1605    // and the rename syscall ostensibly completed. The H1 doc-comment
1606    // above promised stronger durability than the code delivered;
1607    // fsyncing the parent dir closes that gap.
1608    //
1609    // Best-effort on Unix: a directory open + sync_all is the standard
1610    // pattern (see e.g. SQLite's atomic-commit, leveldb, lmdb). On
1611    // platforms where opening a directory for sync isn't supported, we
1612    // silently skip -- the rename is still atomic-with-respect-to-
1613    // observers, we just don't guarantee crash-durability of the
1614    // dirent update.
1615    #[cfg(unix)]
1616    {
1617        if let Some(parent) = path.parent() {
1618            // Errors here are non-fatal: the rename succeeded and the
1619            // common case (no power loss before the next fs flush) is
1620            // correct. We surface a failure to open/sync the dir only
1621            // if the rename itself succeeded, since otherwise the
1622            // caller would mistake a durability hint for a write
1623            // failure. swallow silently rather than return.
1624            if let Ok(dir) = fs::File::open(parent) {
1625                let _ = dir.sync_all();
1626            }
1627        }
1628    }
1629
1630    Ok(())
1631}
1632
1633fn unix_now() -> u64 {
1634    use std::time::{SystemTime, UNIX_EPOCH};
1635    SystemTime::now()
1636        .duration_since(UNIX_EPOCH)
1637        .unwrap_or_default()
1638        .as_secs()
1639}
1640
1641#[cfg(test)]
1642mod tests {
1643    use super::*;
1644
1645    fn temp_dir_path() -> PathBuf {
1646        let mut p = std::env::temp_dir();
1647        p.push(format!("treeship-test-{}", {
1648            let mut b = [0u8; 4];
1649            // v0.10.4 P1 audit: thread_rng acceptable here. This is a
1650            // test-only temp-dir suffix to avoid collisions between parallel
1651            // test runs. Not a cryptographic input; entropy quality irrelevant.
1652            rand::thread_rng().fill_bytes(&mut b);
1653            hex_encode(&b)
1654        }));
1655        p
1656    }
1657
1658    fn make_store() -> (Store, PathBuf) {
1659        let dir = temp_dir_path();
1660        let store = Store::open(&dir).unwrap();
1661        (store, dir)
1662    }
1663
1664    fn cleanup(dir: PathBuf) {
1665        let _ = fs::remove_dir_all(dir);
1666    }
1667
1668    #[test]
1669    fn generate_key() {
1670        let (store, dir) = make_store();
1671        let info = store.generate(true).unwrap();
1672        assert!(info.id.starts_with("key_"));
1673        assert_eq!(info.algorithm, "ed25519");
1674        assert!(!info.fingerprint.is_empty());
1675        assert_eq!(info.public_key.len(), 32);
1676        cleanup(dir);
1677    }
1678
1679    #[test]
1680    fn default_signer_works() {
1681        let (store, dir) = make_store();
1682        store.generate(true).unwrap();
1683        let signer = store.default_signer().unwrap();
1684        assert!(!signer.key_id().is_empty());
1685        let pae = crate::attestation::pae("text/plain", b"test");
1686        let sig = signer.sign(&pae).unwrap();
1687        assert_eq!(sig.len(), 64);
1688        cleanup(dir);
1689    }
1690
1691    #[test]
1692    fn encrypt_decrypt_roundtrip() {
1693        // Routes the legacy public API through the dispatcher; v1
1694        // ciphertexts must still decrypt correctly.
1695        let key = [42u8; 32];
1696        let plaintext = b"super secret private key material here!";
1697        let (enc, nonce) = aes_gcm_encrypt(&key, plaintext).unwrap();
1698        let dec = aes_gcm_decrypt(&key, &enc, &nonce).unwrap();
1699        assert_eq!(dec, plaintext);
1700    }
1701
1702    #[test]
1703    fn decrypt_wrong_key_fails() {
1704        let key   = [42u8; 32];
1705        let wrong = [99u8; 32];
1706        let (enc, nonce) = aes_gcm_encrypt(&key, b"secret").unwrap();
1707        assert!(aes_gcm_decrypt(&wrong, &enc, &nonce).is_err());
1708    }
1709
1710    // --- v2 AEAD tests (TS-2026-001 fix) -----------------------------------
1711
1712    // Fixed entry id + pubkey for the unit-level v2 tests below. The AAD
1713    // builder binds these into the GCM tag, so encrypt and decrypt must
1714    // see identical values. Using constants keeps each test focused on
1715    // its own bit-flip / tamper assertion without dragging Store setup
1716    // into the picture.
1717    const TEST_ENTRY_ID: &str = "key_unit_test_entry_0001";
1718    const TEST_PUBLIC_KEY: &[u8; 32] = &[0xAA; 32];
1719
1720    #[test]
1721    fn v2_encrypt_decrypt_roundtrip() {
1722        let key = [7u8; 32];
1723        let plaintext = b"super secret private key material here!";
1724        let blob =
1725            encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, plaintext).unwrap();
1726        // Structural check on the framing.
1727        assert_eq!(blob[0], KEYSTORE_MAGIC, "magic byte");
1728        assert_eq!(blob[1], KEYSTORE_VERSION_V2, "version byte");
1729        assert_eq!(blob.len(), 2 + 12 + plaintext.len() + 16,
1730                   "magic+version+nonce+ct+tag length");
1731
1732        let dec =
1733            decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]).unwrap();
1734        assert_eq!(&*dec, plaintext);
1735    }
1736
1737    #[test]
1738    fn v2_decrypt_wrong_key_fails() {
1739        let key   = [7u8; 32];
1740        let wrong = [99u8; 32];
1741        let blob = encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"secret").unwrap();
1742        // Wrong key with v2 framing: AEAD must reject. Dispatcher will
1743        // try v1 fallback (which also fails on garbage), so the final
1744        // error surfaces as a MAC failure rather than wrong plaintext.
1745        let result = decrypt_from_disk(&wrong, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]);
1746        assert!(result.is_err(), "wrong key must fail");
1747    }
1748
1749    #[test]
1750    fn v2_tamper_ciphertext_fails() {
1751        let key = [7u8; 32];
1752        let mut blob = encrypt_for_disk_v2(
1753            &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"super secret private key"
1754        ).unwrap();
1755        // Flip one bit inside the ciphertext body (after the 14-byte
1756        // framing). GCM authenticates ciphertext + nonce; any flip must
1757        // fail.
1758        let last = blob.len() - 5;
1759        blob[last] ^= 0x01;
1760        let result = decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]);
1761        assert!(result.is_err(), "tampered ciphertext must fail to decrypt");
1762    }
1763
1764    #[test]
1765    fn v2_tamper_nonce_fails() {
1766        let key = [7u8; 32];
1767        let mut blob = encrypt_for_disk_v2(
1768            &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"super secret private key"
1769        ).unwrap();
1770        // Flip a bit in the nonce (bytes [2..14]).
1771        blob[5] ^= 0x01;
1772        let result = decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]);
1773        assert!(result.is_err(), "tampered nonce must fail to decrypt");
1774    }
1775
1776    #[test]
1777    fn v2_tamper_tag_fails() {
1778        let key = [7u8; 32];
1779        let mut blob = encrypt_for_disk_v2(
1780            &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"super secret private key"
1781        ).unwrap();
1782        // Flip a bit in the trailing GCM tag (last 16 bytes).
1783        let len = blob.len();
1784        blob[len - 1] ^= 0x80;
1785        let result = decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]);
1786        assert!(result.is_err(), "tampered GCM tag must fail to decrypt");
1787    }
1788
1789    #[test]
1790    fn v2_nonces_are_unique_across_writes() {
1791        // Sanity check: two encryptions of identical plaintext under the
1792        // same key must produce different blobs (random per-write nonce).
1793        // Without this property, AES-GCM is catastrophically broken.
1794        let key = [7u8; 32];
1795        let blob_a =
1796            encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"identical").unwrap();
1797        let blob_b =
1798            encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"identical").unwrap();
1799        assert_ne!(blob_a, blob_b,
1800                   "two v2 encryptions of the same plaintext must differ");
1801        assert_ne!(&blob_a[2..14], &blob_b[2..14], "nonces must differ");
1802
1803        // L1 (TS-2026-001 audit): draw 10k nonces in a row and assert
1804        // every one is distinct. A duplicate at this volume would be a
1805        // strong (10k^2 / 2^96 ~ 2^-65 floor) signal that the OS CSPRNG
1806        // backing aead::OsRng is misbehaving on this build. Cheap, fast,
1807        // and catches a regression class (PRNG mis-seeding,
1808        // accidentally-deterministic nonce, RNG getting forked across
1809        // threads without re-seed) that the 2-sample check above can't.
1810        const N: usize = 10_000;
1811        let mut nonces: std::collections::HashSet<Vec<u8>> =
1812            std::collections::HashSet::with_capacity(N);
1813        for _ in 0..N {
1814            let blob =
1815                encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"x").unwrap();
1816            // bytes [2..14] are the 12-byte GCM nonce.
1817            nonces.insert(blob[2..14].to_vec());
1818        }
1819        assert_eq!(
1820            nonces.len(),
1821            N,
1822            "all {} v2 nonces must be unique; collision => RNG defect",
1823            N
1824        );
1825    }
1826
1827    #[test]
1828    fn v2_tamper_version_byte_fails() {
1829        // M2: flipping the version byte must cause decryption to fail.
1830        // The framing sanity check catches obvious flips immediately;
1831        // the AAD-binding test below covers the case where the framing
1832        // sanity check would otherwise pass.
1833        let key = [7u8; 32];
1834        let mut blob = encrypt_for_disk_v2(
1835            &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"super secret private key"
1836        ).unwrap();
1837        assert_eq!(blob[1], KEYSTORE_VERSION_V2);
1838        blob[1] = 0xff;
1839        assert!(
1840            decrypt_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob).is_err(),
1841            "altered version byte must be rejected"
1842        );
1843    }
1844
1845    #[test]
1846    fn v2_aad_binding_detects_framing_substitution() {
1847        // M2 direct check: encrypt a payload with v2 AAD, then construct
1848        // a blob whose framing claims to be v2 but whose ciphertext was
1849        // computed under a different AAD (empty). decrypt_v2 must
1850        // reject with MAC failure rather than returning the plaintext.
1851        let key = [7u8; 32];
1852        let plaintext = b"M2 AAD bound material";
1853
1854        // Compute a v2-framed blob without supplying AAD -- mimics what
1855        // the *pre-M2* code would have produced. This is the exact
1856        // attack surface AAD closes: an old blob whose framing is v2
1857        // but whose tag was computed empty.
1858        use aes_gcm::aead::Aead;
1859        let key_buf: Zeroizing<[u8; 32]> = Zeroizing::new(key);
1860        let aead_key: &AesKey<Aes256Gcm> = AesKey::<Aes256Gcm>::from_slice(key_buf.as_slice());
1861        let cipher = Aes256Gcm::new(aead_key);
1862        let nonce = Aes256Gcm::generate_nonce(&mut AeadOsRng);
1863        let ct_no_aad = cipher.encrypt(&nonce, plaintext.as_slice()).unwrap();
1864
1865        let mut forged = Vec::with_capacity(2 + 12 + ct_no_aad.len());
1866        forged.push(KEYSTORE_MAGIC);
1867        forged.push(KEYSTORE_VERSION_V2);
1868        forged.extend_from_slice(nonce.as_slice());
1869        forged.extend_from_slice(&ct_no_aad);
1870
1871        // Framing sanity passes. AAD does not. decrypt_v2 must reject.
1872        assert_eq!(forged[0], KEYSTORE_MAGIC);
1873        assert_eq!(forged[1], KEYSTORE_VERSION_V2);
1874        let result = decrypt_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &forged);
1875        assert!(result.is_err(),
1876                "ciphertext computed without AAD must fail to decrypt now that AAD is bound");
1877    }
1878
1879    #[test]
1880    fn dispatcher_surfaces_v2_error_on_corrupted_v2_blob() {
1881        // M1: a v2-shaped blob whose AEAD verification fails (and
1882        // whose v1 fallback also fails, since the bytes are garbage
1883        // under both constructions) must surface the v2 MAC error, not
1884        // the v1 "ciphertext too short" / random-junk error. The user
1885        // sees a meaningful message that points at the right
1886        // remediation.
1887        let key = [7u8; 32];
1888        let mut blob =
1889            encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"hello").unwrap();
1890        // Flip a byte in the GCM tag (last 16 bytes) so the v2 AEAD
1891        // rejects but the framing still classifies as v2.
1892        let last = blob.len() - 1;
1893        blob[last] ^= 0x01;
1894
1895        let err =
1896            decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]).unwrap_err();
1897        // The dispatcher should bubble the v2 error string up. v2's
1898        // error message contains "MAC verification failed"; v1's
1899        // shape on garbage data is either "ciphertext too short" or
1900        // a different MAC error. Match on the v2-specific tail.
1901        assert!(
1902            err.contains("MAC verification failed"),
1903            "dispatcher must surface the v2 MAC error on corrupted v2 blob, got: {err}"
1904        );
1905    }
1906
1907    #[test]
1908    fn legacy_v1_ciphertext_still_decrypts_via_dispatcher() {
1909        // Simulates an on-disk keystore written by Treeship <= v0.10.2:
1910        // the dispatcher must successfully route legacy ciphertexts
1911        // through the v1 path so existing users are not locked out.
1912        let key = [13u8; 32];
1913        let plaintext = b"pre-v0.10.3 keystore entry";
1914        let (legacy_blob, legacy_nonce) =
1915            legacy_v1_encrypt(&key, plaintext).unwrap();
1916
1917        // Sanity: legacy blob does NOT start with v2 framing.
1918        assert!(is_legacy_v1(&legacy_blob),
1919                "legacy_v1_encrypt output must classify as legacy");
1920
1921        // Dispatcher must accept it. AAD inputs are irrelevant for the
1922        // v1 path (it doesn't use them), but the signature requires them
1923        // — pass the same placeholder constants used elsewhere.
1924        let dec = decrypt_from_disk(
1925            &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &legacy_blob, &legacy_nonce,
1926        )
1927        .unwrap();
1928        assert_eq!(&*dec, plaintext);
1929    }
1930
1931    #[test]
1932    fn store_signer_migrates_legacy_entry_to_v2() {
1933        // End-to-end: write a key entry with the legacy v1 ciphertext
1934        // (as if upgrading from v0.10.2), call `signer()`, then verify
1935        // the on-disk entry has been rewritten in v2 format.
1936        let (store, dir) = make_store();
1937
1938        // Generate normally (this writes v2). Then re-encrypt the
1939        // secret in v1 format and overwrite the entry on disk to
1940        // simulate the upgrade scenario.
1941        let info = store.generate(true).unwrap();
1942        let entry_path = store.entry_path(&info.id);
1943
1944        // Pull the v2 entry off disk, decrypt to recover the secret,
1945        // then re-encode in legacy v1 format and write it back.
1946        let v2_entry: EncryptedEntry =
1947            serde_json::from_slice(&fs::read(&entry_path).unwrap()).unwrap();
1948        let secret = decrypt_from_disk(
1949            &store.machine_key,
1950            &v2_entry.id,
1951            &v2_entry.public_key,
1952            &v2_entry.enc_priv_key,
1953            &v2_entry.nonce,
1954        )
1955            .unwrap();
1956        let (legacy_blob, legacy_nonce) =
1957            legacy_v1_encrypt(&store.machine_key, &secret).unwrap();
1958        let legacy_entry = EncryptedEntry {
1959            id:               v2_entry.id.clone(),
1960            algorithm:        v2_entry.algorithm.clone(),
1961            created_at:       v2_entry.created_at.clone(),
1962            public_key:       v2_entry.public_key.clone(),
1963            enc_priv_key:     legacy_blob,
1964            nonce:            legacy_nonce,
1965            valid_until:      v2_entry.valid_until.clone(),
1966            successor_key_id: v2_entry.successor_key_id.clone(),
1967        };
1968        fs::write(&entry_path, serde_json::to_vec_pretty(&legacy_entry).unwrap()).unwrap();
1969
1970        // Reload with a fresh Store so the cache doesn't paper over the
1971        // on-disk change.
1972        let store2 = Store::open(&dir).unwrap();
1973        // Loading the signer must succeed (legacy path works) AND
1974        // trigger the transparent migration to v2.
1975        let _signer = store2.signer(&info.id).unwrap();
1976
1977        let after: EncryptedEntry =
1978            serde_json::from_slice(&fs::read(&entry_path).unwrap()).unwrap();
1979        assert!(!is_legacy_v1(&after.enc_priv_key),
1980                "post-migration entry must be in v2 format");
1981        assert_eq!(after.enc_priv_key[0], KEYSTORE_MAGIC);
1982        assert_eq!(after.enc_priv_key[1], KEYSTORE_VERSION_V2);
1983        assert!(after.nonce.is_empty(),
1984                "v2 entries serialize an empty legacy nonce field");
1985
1986        // L2 (TS-2026-001 audit): the framing check above proves the
1987        // migrator *wrote* a v2-shaped blob, but a downstream
1988        // assert_eq! on framing alone doesn't prove the v2 ciphertext
1989        // is actually a working AEAD encryption of the right secret.
1990        // Load the signer one more time through a fresh Store; this
1991        // routes through the dispatcher's v2-first branch and would
1992        // fail loudly if the migration had produced garbage.
1993        let store3 = Store::open(&dir).unwrap();
1994        let _signer = store3
1995            .signer(&info.id)
1996            .expect("post-migration v2 decrypt works");
1997
1998        cleanup(dir);
1999    }
2000
2001    #[test]
2002    fn persist_and_reload() {
2003        let (store, dir) = make_store();
2004        let info = store.generate(true).unwrap();
2005
2006        // Open a new Store instance pointing to the same directory.
2007        let store2 = Store::open(&dir).unwrap();
2008        let signer = store2.signer(&info.id).unwrap();
2009        assert_eq!(signer.key_id(), info.id);
2010
2011        // The reloaded signer must produce signatures verifiable with
2012        // the same public key.
2013        let verifier = {
2014            use crate::attestation::Verifier;
2015            use ed25519_dalek::VerifyingKey;
2016            let pk_bytes: [u8; 32] = info.public_key.try_into().unwrap();
2017            let vk = VerifyingKey::from_bytes(&pk_bytes).unwrap();
2018            let mut v = Verifier::new(std::collections::HashMap::new());
2019            v.add_key(info.id.clone(), vk);
2020            v
2021        };
2022
2023        use crate::attestation::sign;
2024        use crate::statements::ActionStatement;
2025        let stmt   = ActionStatement::new("agent://test", "tool.call");
2026        let pt     = crate::statements::payload_type("action");
2027        let signed = sign(&pt, &stmt, signer.as_ref()).unwrap();
2028        verifier.verify(&signed.envelope).unwrap();
2029
2030        cleanup(dir);
2031    }
2032
2033    #[test]
2034    fn list_keys() {
2035        let (store, dir) = make_store();
2036        store.generate(true).unwrap();
2037        store.generate(false).unwrap();
2038
2039        let keys = store.list().unwrap();
2040        assert_eq!(keys.len(), 2);
2041        assert_eq!(keys.iter().filter(|k| k.is_default).count(), 1);
2042        cleanup(dir);
2043    }
2044
2045    #[test]
2046    fn no_default_key_errors() {
2047        let (store, dir) = make_store();
2048        assert!(store.default_signer().is_err());
2049        cleanup(dir);
2050    }
2051
2052    #[test]
2053    fn rotate_mints_successor_and_links_predecessor() {
2054        let (store, dir) = make_store();
2055        let pred = store.generate(true).unwrap();
2056        assert!(pred.valid_until.is_none(), "fresh key has no expiry");
2057        assert!(pred.successor_key_id.is_none(), "fresh key has no successor");
2058
2059        let result = store
2060            .rotate(None, std::time::Duration::from_secs(3600), true)
2061            .unwrap();
2062
2063        // Predecessor metadata is updated.
2064        assert_eq!(result.predecessor.id, pred.id);
2065        assert!(result.predecessor.valid_until.is_some(),
2066                "predecessor must get valid_until after rotation");
2067        assert_eq!(result.predecessor.successor_key_id.as_deref(),
2068                   Some(result.successor.id.as_str()),
2069                   "predecessor must link forward to successor");
2070        assert!(!result.predecessor.is_default,
2071                "after rotation with set_default=true, predecessor is no longer default");
2072
2073        // Successor is fresh.
2074        assert_ne!(result.successor.id, pred.id);
2075        assert!(result.successor.valid_until.is_none(), "successor has no expiry yet");
2076        assert!(result.successor.successor_key_id.is_none(), "successor is chain head");
2077        assert!(result.successor.is_default, "successor is the new default");
2078
2079        // Same metadata visible via list().
2080        let listed = store.list().unwrap();
2081        assert_eq!(listed.len(), 2);
2082        let pred_listed = listed.iter().find(|k| k.id == pred.id).unwrap();
2083        assert!(pred_listed.valid_until.is_some());
2084        assert_eq!(pred_listed.successor_key_id.as_deref(),
2085                   Some(result.successor.id.as_str()));
2086
2087        cleanup(dir);
2088    }
2089
2090    #[test]
2091    fn rotate_with_set_default_false_keeps_predecessor_active() {
2092        let (store, dir) = make_store();
2093        let pred = store.generate(true).unwrap();
2094
2095        let result = store
2096            .rotate(None, std::time::Duration::from_secs(3600), false)
2097            .unwrap();
2098
2099        // Predecessor is still default. Successor exists but is not default.
2100        assert!(result.predecessor.is_default);
2101        assert!(!result.successor.is_default);
2102        assert_eq!(store.default_key_id().unwrap(), pred.id);
2103
2104        cleanup(dir);
2105    }
2106
2107    #[test]
2108    fn rotate_predecessor_signing_still_works_during_grace_window() {
2109        let (store, dir) = make_store();
2110        let pred = store.generate(true).unwrap();
2111        let _ = store
2112            .rotate(None, std::time::Duration::from_secs(3600), true)
2113            .unwrap();
2114
2115        // Predecessor key must still be loadable and capable of signing
2116        // during its grace window. Verifiers can refuse on lifecycle, but
2117        // the keystore must not preemptively destroy material.
2118        let signer = store.signer(&pred.id).unwrap();
2119        let pae = crate::attestation::pae("text/plain", b"grace-window-payload");
2120        let sig = signer.sign(&pae).unwrap();
2121        assert_eq!(sig.len(), 64);
2122
2123        cleanup(dir);
2124    }
2125
2126    #[test]
2127    fn rotate_refuses_to_rotate_already_rotated_key() {
2128        let (store, dir) = make_store();
2129        store.generate(true).unwrap();
2130        let r1 = store
2131            .rotate(None, std::time::Duration::from_secs(60), true)
2132            .unwrap();
2133
2134        // Rotating the predecessor again must be refused -- it already
2135        // points at r1.successor. Caller should rotate the chain head.
2136        let err = store
2137            .rotate(Some(&r1.predecessor.id),
2138                    std::time::Duration::from_secs(60),
2139                    true)
2140            .unwrap_err();
2141        match err {
2142            KeyError::Crypto(msg) => assert!(
2143                msg.contains("already been rotated"),
2144                "error must explain why: {msg}"
2145            ),
2146            other => panic!("expected Crypto error, got {other:?}"),
2147        }
2148        cleanup(dir);
2149    }
2150
2151    #[test]
2152    fn successor_chain_walks_forward() {
2153        let (store, dir) = make_store();
2154        let k0 = store.generate(true).unwrap();
2155        let r1 = store
2156            .rotate(None, std::time::Duration::from_secs(60), true)
2157            .unwrap();
2158        let r2 = store
2159            .rotate(None, std::time::Duration::from_secs(60), true)
2160            .unwrap();
2161
2162        let chain = store.successor_chain(&k0.id).unwrap();
2163        assert_eq!(chain, vec![k0.id.clone(), r1.successor.id.clone(), r2.successor.id.clone()],
2164                   "chain must be ordered head -> tail");
2165
2166        // Mid-chain start: chain from r1.successor should drop k0.
2167        let mid = store.successor_chain(&r1.successor.id).unwrap();
2168        assert_eq!(mid, vec![r1.successor.id.clone(), r2.successor.id.clone()]);
2169
2170        // Tail: just itself.
2171        let tail = store.successor_chain(&r2.successor.id).unwrap();
2172        assert_eq!(tail, vec![r2.successor.id.clone()]);
2173
2174        cleanup(dir);
2175    }
2176
2177    #[test]
2178    fn valid_keys_at_filters_by_grace_window() {
2179        let (store, dir) = make_store();
2180        let _ = store.generate(true).unwrap();
2181        let result = store
2182            .rotate(None, std::time::Duration::from_secs(3600), true)
2183            .unwrap();
2184
2185        // At time-of-rotation, both keys must be valid -- predecessor is
2186        // mid-grace, successor is freshly minted.
2187        let now = unix_now();
2188        let valid_now = store.valid_keys_at(now).unwrap();
2189        assert_eq!(valid_now.len(), 2, "both predecessor (in grace) and successor should be valid");
2190
2191        // After the grace window expires, only the successor remains.
2192        let after_grace = unix_now() + 7200;
2193        let valid_after = store.valid_keys_at(after_grace).unwrap();
2194        assert_eq!(valid_after.len(), 1,
2195                   "after grace window only successor remains valid");
2196        assert_eq!(valid_after[0].id, result.successor.id);
2197
2198        cleanup(dir);
2199    }
2200
2201    /// Regression: if the successor key file is missing on disk (because a
2202    /// prior rotate() crashed AFTER stamping the predecessor but BEFORE
2203    /// writing the successor), retrying must NOT be wedged. With the
2204    /// successor-first write order this scenario can't be reached by a
2205    /// single-process crash, but we still need to defend against an operator
2206    /// who manually deletes a successor file mid-life. The recovery path
2207    /// is: clear the predecessor's successor pointer (or restore the file
2208    /// from backup) and try again.
2209    /// Regression: even if the manifest write FAILED (say, disk full at
2210    /// the worst possible moment), the in-memory cache must reflect the
2211    /// stamped predecessor that already landed on disk -- otherwise a
2212    /// same-process retry would skip the already-rotated guard and mint
2213    /// a duplicate successor.
2214    ///
2215    /// We can't easily inject a manifest-write failure mid-test, but we
2216    /// can verify the precondition that makes the recovery work: after a
2217    /// successful rotate(), the cache holds the stamped predecessor (so
2218    /// any subsequent rotate would correctly refuse). Combined with the
2219    /// write order (cache update BEFORE manifest write in rotate()),
2220    /// this proves a manifest-write crash leaves the cache aligned with
2221    /// disk, not behind it.
2222    #[test]
2223    fn rotate_cache_reflects_stamped_predecessor_for_retry_safety() {
2224        let (store, dir) = make_store();
2225        let pred = store.generate(true).unwrap();
2226        let _ = store
2227            .rotate(None, std::time::Duration::from_secs(60), true)
2228            .unwrap();
2229
2230        // The cache must have the stamped predecessor; a same-process
2231        // retry of rotate(predecessor) MUST be refused. If the cache
2232        // were stale (still showing the unstamped predecessor), this
2233        // call would proceed and mint a duplicate successor.
2234        let err = store
2235            .rotate(Some(&pred.id),
2236                    std::time::Duration::from_secs(60),
2237                    true)
2238            .unwrap_err();
2239        match err {
2240            KeyError::Crypto(msg) => assert!(
2241                msg.contains("already been rotated"),
2242                "cache should reflect stamped predecessor; got: {msg}"
2243            ),
2244            other => panic!("expected Crypto error, got {other:?}"),
2245        }
2246
2247        cleanup(dir);
2248    }
2249
2250    #[test]
2251    fn rotated_predecessor_pointing_at_missing_successor_surfaces_clear_error() {
2252        let (store, dir) = make_store();
2253        store.generate(true).unwrap();
2254        let result = store
2255            .rotate(None, std::time::Duration::from_secs(60), true)
2256            .unwrap();
2257
2258        // Simulate operator-deleted successor file. The manifest still
2259        // references it, so a cold-cache reader trying to walk the chain
2260        // hits a clear NotFound for the missing key.
2261        let succ_path = store.entry_path(&result.successor.id);
2262        fs::remove_file(&succ_path).unwrap();
2263
2264        // Open a fresh Store instance so the cache doesn't paper over the
2265        // missing on-disk entry. successor_chain() walks via load_entry;
2266        // the missing file must produce KeyError::NotFound, not a panic
2267        // and not an infinite loop.
2268        let store2 = Store::open(&dir).unwrap();
2269        let err = store2.successor_chain(&result.predecessor.id).unwrap_err();
2270        match err {
2271            KeyError::NotFound(id) => assert_eq!(id, result.successor.id),
2272            other => panic!("expected NotFound error, got {other:?}"),
2273        }
2274
2275        cleanup(dir);
2276    }
2277
2278    /// Pre-0.9.5 entry files lack `valid_until` and `successor_key_id`.
2279    /// They must still deserialize cleanly and be visible via `list()` /
2280    /// `default_signer()` etc.
2281    #[test]
2282    fn legacy_entry_without_lifecycle_fields_loads() {
2283        let (store, dir) = make_store();
2284        let info = store.generate(true).unwrap();
2285
2286        // Re-serialize the on-disk entry without the new fields, simulating
2287        // a file created by a 0.9.4 or earlier CLI.
2288        let path = store.entry_path(&info.id);
2289        let raw  = fs::read(&path).unwrap();
2290        let mut json: serde_json::Value = serde_json::from_slice(&raw).unwrap();
2291        let obj = json.as_object_mut().unwrap();
2292        obj.remove("valid_until");
2293        obj.remove("successor_key_id");
2294        fs::write(&path, serde_json::to_vec_pretty(&json).unwrap()).unwrap();
2295
2296        // A fresh Store (cold cache) must still load the entry and treat
2297        // the missing fields as None.
2298        let store2 = Store::open(&dir).unwrap();
2299        let listed = store2.list().unwrap();
2300        assert_eq!(listed.len(), 1);
2301        assert!(listed[0].valid_until.is_none(),
2302                "missing valid_until must default to None on legacy entry");
2303        assert!(listed[0].successor_key_id.is_none(),
2304                "missing successor_key_id must default to None on legacy entry");
2305        let signer = store2.default_signer().unwrap();
2306        assert_eq!(signer.key_id(), info.id);
2307
2308        cleanup(dir);
2309    }
2310
2311    // --- keystore permission hardening (PR 1) -------------------------------
2312
2313    // The perm tests below mutate the process-global env var
2314    // TREESHIP_ALLOW_INSECURE_KEY_PERMS. cargo test runs cases in
2315    // parallel by default, so without serialization one test can set
2316    // the bypass while another expects it unset and racefully fail.
2317    // This mutex serializes them; everything else in the file remains
2318    // parallel-safe.
2319    static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
2320
2321    #[test]
2322    #[cfg(unix)]
2323    fn write_entry_creates_file_with_0600() {
2324        use std::os::unix::fs::PermissionsExt;
2325        let (store, dir) = make_store();
2326        let info = store.generate(true).unwrap();
2327        let mode = fs::metadata(store.entry_path(&info.id))
2328            .unwrap()
2329            .permissions()
2330            .mode()
2331            & 0o777;
2332        assert_eq!(mode, 0o600, "freshly written key file must be 0600, got {:o}", mode);
2333        cleanup(dir);
2334    }
2335
2336    #[test]
2337    #[cfg(unix)]
2338    fn signer_refuses_world_readable_key() {
2339        use std::os::unix::fs::PermissionsExt;
2340        // Mutex prevents the bypass var from being toggled by a
2341        // sibling test mid-flight (cargo test parallel runner).
2342        let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
2343        // Make sure the bypass var is not leaking from the host env.
2344        std::env::remove_var("TREESHIP_ALLOW_INSECURE_KEY_PERMS");
2345
2346        let (store, dir) = make_store();
2347        let info = store.generate(true).unwrap();
2348
2349        // Loosen perms on the key file -- simulates a checkout, scp, or
2350        // shared-volume mishap.
2351        let path = store.entry_path(&info.id);
2352        fs::set_permissions(&path, fs::Permissions::from_mode(0o644)).unwrap();
2353
2354        match store.signer(&info.id) {
2355            Err(KeyError::InsecureKeyPerms { path: p, mode }) => {
2356                assert_eq!(p, path);
2357                assert_eq!(mode & 0o777, 0o644);
2358            }
2359            other => panic!("expected InsecureKeyPerms, got {:?}", other.map(|_| "ok")),
2360        }
2361        cleanup(dir);
2362    }
2363
2364    #[test]
2365    #[cfg(unix)]
2366    fn signer_bypass_via_env_var() {
2367        use std::os::unix::fs::PermissionsExt;
2368        let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
2369        let (store, dir) = make_store();
2370        let info = store.generate(true).unwrap();
2371        let path = store.entry_path(&info.id);
2372        fs::set_permissions(&path, fs::Permissions::from_mode(0o644)).unwrap();
2373
2374        std::env::set_var("TREESHIP_ALLOW_INSECURE_KEY_PERMS", "1");
2375        let result = store.signer(&info.id);
2376        std::env::remove_var("TREESHIP_ALLOW_INSECURE_KEY_PERMS");
2377
2378        assert!(
2379            result.is_ok(),
2380            "bypass env var must allow signing: {:?}",
2381            result.err()
2382        );
2383        cleanup(dir);
2384    }
2385
2386    // --- v0.10.4 P2: TOCTOU window in signer() perm-check ---------------
2387
2388    /// Structural / single-open proof: the on-disk key file is opened
2389    /// EXACTLY ONCE during `signer()`. The fix replaces the prior
2390    /// `check_key_file_perms(path) + load_entry(id) -> fs::read(path)`
2391    /// two-open shape with `read_entry_with_perm_check`, which opens
2392    /// once and fstat's the resulting fd. We can't reliably race the
2393    /// FS in a unit test, so instead we assert the structural
2394    /// invariant: after `signer()` succeeds, only the bytes that the
2395    /// open file descriptor saw at perm-check time can have been read.
2396    ///
2397    /// The simulation: stage an attacker-controlled "loose perms"
2398    /// envelope at the path, then call `signer()`. With the fixed
2399    /// single-open shape, perm-check on the open fd fails before any
2400    /// content is read -- we get `InsecureKeyPerms`, not a successful
2401    /// signer. The legacy two-open code would have observed the perm
2402    /// failure on the same loose file too, but the property we are
2403    /// pinning here is that the perm rejection comes from the SAME fd
2404    /// the read would have used (no chance for an intermediate swap).
2405    #[test]
2406    #[cfg(unix)]
2407    fn signer_rejects_post_check_swap() {
2408        use std::os::unix::fs::PermissionsExt;
2409        let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
2410        std::env::remove_var("TREESHIP_ALLOW_INSECURE_KEY_PERMS");
2411
2412        let (store, dir) = make_store();
2413        let info = store.generate(true).unwrap();
2414        let path = store.entry_path(&info.id);
2415
2416        // Snapshot the legit (0o600) v2 ciphertext bytes so we can
2417        // confirm that even if an attacker were to swap THIS exact
2418        // content under a loose-perms file, the single-open gate
2419        // catches it on the fd.
2420        let original_bytes = fs::read(&path).unwrap();
2421        assert!(!original_bytes.is_empty(), "test sanity");
2422
2423        // Stage the swapped file: same envelope content (so the JSON
2424        // parses and AEAD would succeed if we got that far), but
2425        // loose perms. With the old two-open shape, an attacker could
2426        // present 0o600 to perm-check, then race in this 0o644
2427        // version before the read; with the new single-open shape,
2428        // we open once, fstat the fd, and reject before reading.
2429        fs::write(&path, &original_bytes).unwrap();
2430        fs::set_permissions(&path, fs::Permissions::from_mode(0o644)).unwrap();
2431
2432        match store.signer(&info.id) {
2433            Err(KeyError::InsecureKeyPerms { path: p, mode }) => {
2434                assert_eq!(p, path);
2435                assert_eq!(mode & 0o777, 0o644);
2436            }
2437            Err(other) => panic!(
2438                "expected InsecureKeyPerms from single-open fstat gate, got {:?}",
2439                other
2440            ),
2441            Ok(_) => panic!(
2442                "expected InsecureKeyPerms from single-open fstat gate, got ok signer"
2443            ),
2444        }
2445
2446        // The "structural" half of the test: invoke the helper
2447        // directly. It must reject on the open fd, never returning
2448        // an `EncryptedEntry`. This pins the no-second-open property
2449        // -- if a future refactor reintroduces a path-based read
2450        // after the perm gate, this assertion still holds (the gate
2451        // would still trip on the same loose fd) but the code review
2452        // diff is the real test for the structural invariant.
2453        let direct = store.read_entry_with_perm_check(&info.id);
2454        assert!(
2455            matches!(direct, Err(KeyError::InsecureKeyPerms { .. })),
2456            "read_entry_with_perm_check must reject before reading bytes; got {:?}",
2457            direct.map(|_| "ok")
2458        );
2459
2460        cleanup(dir);
2461    }
2462
2463    // --- TS-2026-001 H3 migration-lock concurrency test -----------------
2464
2465    /// H3: two threads calling `Store::signer` on the same legacy v1
2466    /// entry must both succeed, the on-disk entry must end up as a
2467    /// valid v2 entry (decryptable via the v2 path), and no `.tmp`
2468    /// fragment must be left in the keystore directory.
2469    ///
2470    /// Without the advisory lock around `migrate_entry_to_v2`, two
2471    /// concurrent migrators would race the read-modify-rename cycle:
2472    /// the loser's rename would clobber the winner's v2 entry with
2473    /// its own (also-valid) v2 entry, but in between the two
2474    /// renames a third reader could observe a v2 entry, decrypt
2475    /// successfully, then have its in-memory state invalidated by
2476    /// the second writer. The flock turns the race into a queue --
2477    /// both writers produce identical v2 plaintext, only one rename
2478    /// per entry is actually needed, and the second writer's
2479    /// post-lock recheck observes the v2 state and exits cleanly.
2480    #[test]
2481    fn concurrent_migration_serializes_correctly() {
2482        use std::sync::Arc;
2483        use std::thread;
2484
2485        // Set up a legacy v1 entry on disk -- same shape as the
2486        // store_signer_migrates_legacy_entry_to_v2 test, just shared
2487        // with two threads.
2488        let (store, dir) = make_store();
2489        let info = store.generate(true).unwrap();
2490        let entry_path = store.entry_path(&info.id);
2491
2492        let v2_entry: EncryptedEntry =
2493            serde_json::from_slice(&fs::read(&entry_path).unwrap()).unwrap();
2494        let secret = decrypt_from_disk(
2495            &store.machine_key,
2496            &v2_entry.id,
2497            &v2_entry.public_key,
2498            &v2_entry.enc_priv_key,
2499            &v2_entry.nonce,
2500        )
2501            .unwrap();
2502        let (legacy_blob, legacy_nonce) =
2503            legacy_v1_encrypt(&store.machine_key, &secret).unwrap();
2504        let legacy_entry = EncryptedEntry {
2505            id:               v2_entry.id.clone(),
2506            algorithm:        v2_entry.algorithm.clone(),
2507            created_at:       v2_entry.created_at.clone(),
2508            public_key:       v2_entry.public_key.clone(),
2509            enc_priv_key:     legacy_blob,
2510            nonce:            legacy_nonce,
2511            valid_until:      v2_entry.valid_until.clone(),
2512            successor_key_id: v2_entry.successor_key_id.clone(),
2513        };
2514        fs::write(&entry_path, serde_json::to_vec_pretty(&legacy_entry).unwrap()).unwrap();
2515
2516        // Two independent Store instances racing on the same on-disk
2517        // legacy entry. Using independent Store instances forces the
2518        // lock-on-disk path to engage (a shared Store would serialize
2519        // through the internal RwLock cache and we'd be testing the
2520        // wrong thing).
2521        let dir_a = Arc::new(dir.clone());
2522        let dir_b = Arc::new(dir.clone());
2523        let id_a = info.id.clone();
2524        let id_b = info.id.clone();
2525
2526        let h1 = thread::spawn(move || -> Result<(), String> {
2527            let s = Store::open(&*dir_a).map_err(|e| e.to_string())?;
2528            let _signer = s.signer(&id_a).map_err(|e| e.to_string())?;
2529            Ok(())
2530        });
2531        let h2 = thread::spawn(move || -> Result<(), String> {
2532            let s = Store::open(&*dir_b).map_err(|e| e.to_string())?;
2533            let _signer = s.signer(&id_b).map_err(|e| e.to_string())?;
2534            Ok(())
2535        });
2536
2537        h1.join().unwrap().expect("thread 1 signer load must succeed");
2538        h2.join().unwrap().expect("thread 2 signer load must succeed");
2539
2540        // Post-condition: on-disk entry is v2 framed.
2541        let after: EncryptedEntry =
2542            serde_json::from_slice(&fs::read(&entry_path).unwrap()).unwrap();
2543        assert!(
2544            !is_legacy_v1(&after.enc_priv_key),
2545            "post-concurrent-migration entry must be in v2 format"
2546        );
2547        assert_eq!(after.enc_priv_key[0], KEYSTORE_MAGIC);
2548        assert_eq!(after.enc_priv_key[1], KEYSTORE_VERSION_V2);
2549
2550        // v2 decrypts cleanly. Use the post-migration entry's own id +
2551        // pubkey — the migration must have re-encrypted with those bound
2552        // into the AAD, or this assertion would surface a MAC failure.
2553        let dec = decrypt_v2(
2554            &store.machine_key,
2555            &after.id,
2556            &after.public_key,
2557            &after.enc_priv_key,
2558        )
2559            .expect("v2 entry must decrypt cleanly after concurrent migration");
2560        assert_eq!(dec.len(), 32, "decrypted secret must be a 32-byte ed25519 scalar");
2561
2562        // No stale .tmp file left behind.
2563        for entry in fs::read_dir(&dir).unwrap() {
2564            let p = entry.unwrap().path();
2565            assert!(
2566                p.extension().is_none_or(|e| e != "tmp"),
2567                "no .tmp fragment must remain after migration, found: {}",
2568                p.display()
2569            );
2570        }
2571
2572        cleanup(dir);
2573    }
2574
2575    // --- TS-2026-001 H1 + H2 atomic write tests ------------------------
2576
2577    /// H1: a partial failure between writing the tmp file and renaming
2578    /// it into place MUST leave the original on-disk file intact. We
2579    /// simulate the failure by pre-creating a tmp file (so the next
2580    /// write_file_600 would clobber it) and then independently verifying
2581    /// that an already-written key entry remains decryptable even after
2582    /// a fresh write_file_600 fails partway.
2583    ///
2584    /// We exercise the failure path by pointing the rename at an
2585    /// unwritable target. On Unix we make the *parent directory*
2586    /// read-only after the original key is in place, which causes the
2587    /// final fs::rename to fail with EACCES. The original key file is
2588    /// unaffected because rename(2) returns before touching the target.
2589    #[test]
2590    #[cfg(unix)]
2591    fn atomic_write_leaves_original_intact_on_partial_failure() {
2592        use std::os::unix::fs::PermissionsExt;
2593        let (store, dir) = make_store();
2594        let info = store.generate(true).unwrap();
2595        let entry_path = store.entry_path(&info.id);
2596
2597        // Capture the original bytes for byte-identity comparison.
2598        let original = fs::read(&entry_path).expect("entry file must exist");
2599        assert!(!original.is_empty(), "freshly generated entry must be non-empty");
2600
2601        // Lock the directory: read+execute only, no write. fs::rename
2602        // into this directory will fail.
2603        let orig_dir_mode = fs::metadata(&dir).unwrap().permissions().mode() & 0o777;
2604        fs::set_permissions(&dir, fs::Permissions::from_mode(0o500)).unwrap();
2605
2606        // Attempt a fresh write to the SAME path -- must fail because
2607        // the directory is read-only, exercising the rename-failure
2608        // branch.
2609        let res = write_file_600(&entry_path, b"new junk that must not land");
2610        assert!(res.is_err(), "write_file_600 must fail when dir is read-only");
2611
2612        // Restore perms so we can read back the entry.
2613        fs::set_permissions(&dir, fs::Permissions::from_mode(orig_dir_mode)).unwrap();
2614
2615        // The original key file must be byte-identical to what we
2616        // captured before the failed write.
2617        let after = fs::read(&entry_path).expect("entry file must still exist after failed write");
2618        assert_eq!(
2619            after, original,
2620            "failed atomic write must not corrupt the original file",
2621        );
2622
2623        // And the keystore must still produce a working signer from it.
2624        let store2 = Store::open(&dir).unwrap();
2625        let signer = store2
2626            .signer(&info.id)
2627            .expect("original key must still decrypt after a failed write");
2628        let pae = crate::attestation::pae("text/plain", b"survive");
2629        assert_eq!(signer.sign(&pae).unwrap().len(), 64);
2630
2631        // No stale tmp file left behind.
2632        let tmp = entry_path.with_extension("tmp");
2633        assert!(!tmp.exists(), "tmp file must be cleaned up after rename failure");
2634
2635        cleanup(dir);
2636    }
2637
2638    /// H2: the entry file's mode is 0o600 at the moment of creation, set
2639    /// via OpenOptionsExt::mode rather than a post-write set_permissions
2640    /// (which had a tiny window of looser perms). Also confirms the tmp
2641    /// file is removed by the rename.
2642    #[test]
2643    #[cfg(unix)]
2644    fn mode_is_600_at_creation() {
2645        use std::os::unix::fs::PermissionsExt;
2646        let (store, dir) = make_store();
2647        let info = store.generate(true).unwrap();
2648        let entry_path = store.entry_path(&info.id);
2649
2650        let mode = fs::metadata(&entry_path).unwrap().permissions().mode() & 0o777;
2651        assert_eq!(mode, 0o600, "entry file must be 0600 at creation, got {:o}", mode);
2652
2653        let tmp = entry_path.with_extension("tmp");
2654        assert!(
2655            !tmp.exists(),
2656            "no .tmp file must be left behind after a successful atomic write"
2657        );
2658
2659        cleanup(dir);
2660    }
2661
2662    #[test]
2663    #[cfg(unix)]
2664    fn fix_perms_repairs_loose_modes() {
2665        use std::os::unix::fs::PermissionsExt;
2666        let (store, dir) = make_store();
2667        let info = store.generate(true).unwrap();
2668        let key_path = store.entry_path(&info.id);
2669
2670        fs::set_permissions(&dir, fs::Permissions::from_mode(0o755)).unwrap();
2671        fs::set_permissions(&key_path, fs::Permissions::from_mode(0o644)).unwrap();
2672
2673        let changes = store.fix_perms().unwrap();
2674        // dir + key file + manifest = 3 paths to fix (manifest may already be 0600
2675        // depending on Manifest write path; we only assert the loose ones moved).
2676        assert!(
2677            changes.iter().any(|(p, _, _)| p == &dir),
2678            "dir should be repaired"
2679        );
2680        assert!(
2681            changes.iter().any(|(p, _, _)| p == &key_path),
2682            "key file should be repaired"
2683        );
2684
2685        let dir_mode = fs::metadata(&dir).unwrap().permissions().mode() & 0o777;
2686        let key_mode = fs::metadata(&key_path).unwrap().permissions().mode() & 0o777;
2687        assert_eq!(dir_mode, 0o700);
2688        assert_eq!(key_mode, 0o600);
2689
2690        // After repair, signing must work again.
2691        store.signer(&info.id).expect("signing must work after fix_perms");
2692
2693        cleanup(dir);
2694    }
2695
2696    // --- TS-2026-001 post-merge fix-up: entry-binding AAD ------------------
2697
2698    /// Post-merge audit fix: the v2 AAD now binds entry id + public key
2699    /// into the GCM tag. Without that binding, a local attacker with
2700    /// write access to ~/.treeship/keys/ could copy entry A's
2701    /// `enc_priv_key` ciphertext into entry B's JSON envelope; the
2702    /// decrypt would succeed (same machine key, same framing-only AAD)
2703    /// and the signer for advertised key id A would silently sign with
2704    /// key B's secret scalar.
2705    ///
2706    /// This test performs exactly that swap and asserts decryption now
2707    /// fails. Before the fix this test would silently pass with the
2708    /// wrong scalar -- a true regression guard.
2709    #[test]
2710    fn cross_entry_swap_fails_decryption() {
2711        let (store, dir) = make_store();
2712
2713        // Two independent keys in the same store, same machine key.
2714        let a = store.generate(true).unwrap();
2715        let b = store.generate(false).unwrap();
2716
2717        // Snapshot both on-disk envelopes.
2718        let path_a = store.entry_path(&a.id);
2719        let path_b = store.entry_path(&b.id);
2720        let entry_a: EncryptedEntry =
2721            serde_json::from_slice(&fs::read(&path_a).unwrap()).unwrap();
2722        let entry_b: EncryptedEntry =
2723            serde_json::from_slice(&fs::read(&path_b).unwrap()).unwrap();
2724
2725        // Sanity: both are v2 framed, and the ciphertexts differ.
2726        assert_eq!(entry_a.enc_priv_key[0], KEYSTORE_MAGIC);
2727        assert_eq!(entry_a.enc_priv_key[1], KEYSTORE_VERSION_V2);
2728        assert_eq!(entry_b.enc_priv_key[0], KEYSTORE_MAGIC);
2729        assert_eq!(entry_b.enc_priv_key[1], KEYSTORE_VERSION_V2);
2730        assert_ne!(
2731            entry_a.enc_priv_key, entry_b.enc_priv_key,
2732            "two freshly-generated entries must have distinct ciphertexts"
2733        );
2734
2735        // The attack: copy B's enc_priv_key into A's envelope. Leave
2736        // everything else (id, public_key, algorithm) as it was in A.
2737        // This is the file an attacker with write access to the keys
2738        // directory would produce.
2739        let mut tampered_a = entry_a.clone();
2740        tampered_a.enc_priv_key = entry_b.enc_priv_key.clone();
2741        // The v2 nonce travels inline with the ciphertext (bytes
2742        // [2..14] of enc_priv_key), so swapping the blob also swaps
2743        // the nonce; the separate JSON `nonce` field is empty for v2
2744        // entries either way.
2745        fs::write(&path_a, serde_json::to_vec_pretty(&tampered_a).unwrap()).unwrap();
2746
2747        // Fresh Store so the in-memory cache doesn't paper over the
2748        // on-disk tamper.
2749        let store2 = Store::open(&dir).unwrap();
2750        let err = match store2.signer(&a.id) {
2751            Ok(_) => panic!(
2752                "swapping B's ciphertext into A's envelope must fail decrypt; \
2753                 got Ok which means the signer would silently sign with key B"
2754            ),
2755            Err(e) => e,
2756        };
2757
2758        // The specific error must be a crypto/MAC failure, not (e.g.)
2759        // a NotFound or InsecureKeyPerms surface that could mask the
2760        // class of bug.
2761        match err {
2762            KeyError::Crypto(msg) => assert!(
2763                msg.contains("MAC verification failed"),
2764                "swap must surface MAC failure; got: {msg}"
2765            ),
2766            other => panic!("expected Crypto MAC error, got: {other:?}"),
2767        }
2768
2769        cleanup(dir);
2770    }
2771
2772    /// Companion to `cross_entry_swap_fails_decryption`: the id field
2773    /// is also bound into the AAD, so editing the JSON `id` while
2774    /// leaving the ciphertext alone must also fail. (An attacker who
2775    /// renames a stolen entry file onto a victim's id without
2776    /// re-encrypting would land here.)
2777    #[test]
2778    fn aad_tampered_entry_id_fails_decryption() {
2779        let (store, dir) = make_store();
2780        let info = store.generate(true).unwrap();
2781        let path = store.entry_path(&info.id);
2782
2783        let mut entry: EncryptedEntry =
2784            serde_json::from_slice(&fs::read(&path).unwrap()).unwrap();
2785        assert_eq!(entry.id, info.id, "sanity: id matches what generate returned");
2786
2787        // Pretend the attacker forged an id. Note we write this back to
2788        // the SAME file path so Store::load_entry by the original id
2789        // finds it; if we changed the path too we'd just be testing
2790        // NotFound, which isn't the point.
2791        entry.id = "key_attacker_substituted_id".to_string();
2792        fs::write(&path, serde_json::to_vec_pretty(&entry).unwrap()).unwrap();
2793
2794        // Fresh Store so cache doesn't paper this over. Load via the
2795        // tampered id (matching what's in the JSON) so we exercise the
2796        // decrypt path rather than a path-vs-id mismatch.
2797        let store2 = Store::open(&dir).unwrap();
2798        // Drop the cache by opening fresh; load by the on-disk id.
2799        // The entry_path for "key_attacker_substituted_id" doesn't
2800        // exist, so we deliberately call the lower-level read by
2801        // path-of-original and assert decrypt fails via the dispatcher.
2802        // Easiest: bypass entry_path and invoke decrypt_from_disk with
2803        // the tampered id directly.
2804        let key_buf = store2.machine_key;
2805        let result = decrypt_from_disk(
2806            &key_buf,
2807            &entry.id,          // tampered id (bound into AAD)
2808            &entry.public_key,  // original pubkey
2809            &entry.enc_priv_key,
2810            &entry.nonce,
2811        );
2812        assert!(
2813            result.is_err(),
2814            "AAD-bound entry id mismatch must fail decrypt; got Ok"
2815        );
2816
2817        cleanup(dir);
2818    }
2819}