treeship_core/keys/mod.rs
1use std::{
2 collections::HashMap,
3 fs,
4 io::{self, Read, Write},
5 path::{Path, PathBuf},
6 sync::{Arc, RwLock},
7};
8
9use aes_gcm::{
10 aead::{Aead, KeyInit, OsRng as AeadOsRng, Payload},
11 AeadCore, Aes256Gcm, Key as AesKey, Nonce,
12};
13use rand::{rngs::OsRng, RngCore};
14use serde::{Deserialize, Serialize};
15use sha2::{Digest as Sha2Digest, Sha256};
16use zeroize::Zeroizing;
17
18use crate::attestation::{Ed25519Signer, Signer};
19
20// --- Public types ---
21
22pub type KeyId = String;
23
24/// Public information about a stored key. Never contains private material.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct KeyInfo {
27 pub id: KeyId,
28 pub algorithm: String, // "ed25519"
29 pub is_default: bool,
30 pub created_at: String, // RFC 3339
31 /// First 8 bytes of sha256(public_key), hex-encoded.
32 pub fingerprint: String,
33 pub public_key: Vec<u8>, // raw 32-byte Ed25519 public key
34 /// RFC 3339 timestamp after which signatures by this key should be
35 /// considered stale. `None` means the key has not been rotated and is
36 /// indefinitely valid. Set automatically by `Store::rotate` to
37 /// `now + grace_period` on the predecessor key.
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub valid_until: Option<String>,
40 /// If this key was rotated to a successor, the successor's key id.
41 /// Lets verifiers walk a rotation chain forward when validating an old
42 /// receipt against the current keystore. `None` means this is the head
43 /// of its chain.
44 #[serde(default, skip_serializing_if = "Option::is_none")]
45 pub successor_key_id: Option<KeyId>,
46}
47
48/// Outcome of a `Store::rotate` call.
49#[derive(Debug, Clone)]
50pub struct RotationResult {
51 /// The key that was rotated. Its `valid_until` is now set.
52 pub predecessor: KeyInfo,
53 /// The freshly minted successor key.
54 pub successor: KeyInfo,
55 /// RFC 3339 timestamp until which the predecessor remains valid for
56 /// signature verification under the grace period. Equal to
57 /// `predecessor.valid_until.unwrap()`.
58 pub grace_period_until: String,
59}
60
61/// Errors from keystore operations.
62#[derive(Debug)]
63pub enum KeyError {
64 Io(io::Error),
65 Json(serde_json::Error),
66 Crypto(String),
67 NotFound(KeyId),
68 EmptyKeyId,
69 NoDefaultKey,
70 /// Private key file has insecure permissions (group- or world-readable).
71 /// Carries the path and the observed octal mode so the caller can show
72 /// an actionable error. Set `TREESHIP_ALLOW_INSECURE_KEY_PERMS=1` to
73 /// bypass during testing or controlled environments.
74 InsecureKeyPerms { path: PathBuf, mode: u32 },
75}
76
77impl std::fmt::Display for KeyError {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 match self {
80 Self::Io(e) => write!(f, "keys io: {}", e),
81 Self::Json(e) => write!(f, "keys json: {}", e),
82 Self::Crypto(e) => write!(f, "keys crypto: {}", e),
83 Self::NotFound(k) => write!(f, "key not found: {}", k),
84 Self::EmptyKeyId => write!(f, "key id must not be empty"),
85 Self::NoDefaultKey => write!(f, "no default key — run treeship init"),
86 Self::InsecureKeyPerms { path, mode } => write!(
87 f,
88 "private key {} has insecure permissions (mode {:o}); \
89 run `treeship doctor --fix` or chmod 600 the file. \
90 Set TREESHIP_ALLOW_INSECURE_KEY_PERMS=1 to bypass.",
91 path.display(),
92 mode & 0o777,
93 ),
94 }
95 }
96}
97
98impl std::error::Error for KeyError {}
99impl From<io::Error> for KeyError { fn from(e: io::Error) -> Self { Self::Io(e) } }
100impl From<serde_json::Error> for KeyError { fn from(e: serde_json::Error) -> Self { Self::Json(e) } }
101
102// --- On-disk formats ---
103
104/// The encrypted representation of one keypair on disk.
105#[derive(Serialize, Deserialize, Clone)]
106struct EncryptedEntry {
107 id: KeyId,
108 algorithm: String,
109 created_at: String,
110 public_key: Vec<u8>,
111 /// AES-256-GCM ciphertext of the 32-byte Ed25519 secret scalar.
112 enc_priv_key: Vec<u8>,
113 /// 12-byte GCM nonce used when encrypting.
114 nonce: Vec<u8>,
115 /// RFC 3339 timestamp after which signatures by this key should be
116 /// considered stale. `None` means the key is indefinitely valid.
117 /// Defaulted on deserialization so pre-0.9.5 entry files still load.
118 #[serde(default, skip_serializing_if = "Option::is_none")]
119 valid_until: Option<String>,
120 /// Successor key id if this key was rotated. Defaulted on
121 /// deserialization for pre-0.9.5 entry files.
122 #[serde(default, skip_serializing_if = "Option::is_none")]
123 successor_key_id: Option<KeyId>,
124}
125
126/// The manifest file: which keys exist and which is the default.
127#[derive(Serialize, Deserialize, Default)]
128struct Manifest {
129 default_key_id: Option<KeyId>,
130 key_ids: Vec<KeyId>,
131}
132
133// --- Store ---
134
135/// Local encrypted keystore.
136///
137/// Private keys are encrypted with AES-256-GCM (RustCrypto `aes-gcm`
138/// 0.10) before writing to disk. The encryption key is derived from a
139/// machine-specific secret so key files are useless if copied to
140/// another machine.
141///
142/// Pre-v0.10.3 keystores used a homemade SHA-256-CTR + HMAC-SHA-256
143/// construction (TS-2026-001) and are transparently migrated to the
144/// new AEAD format on first decrypt; see `encrypt_for_disk_v2` /
145/// `decrypt_from_disk` for the format dispatcher.
146///
147/// A future version will delegate to OS credential stores (Secure
148/// Enclave / TPM 2.0).
149pub struct Store {
150 dir: PathBuf,
151 machine_key: [u8; 32],
152 /// In-memory cache — avoids disk reads on hot paths.
153 cache: Arc<RwLock<HashMap<KeyId, EncryptedEntry>>>,
154}
155
156impl Store {
157 /// Opens or creates a keystore at `dir`.
158 pub fn open(dir: impl AsRef<Path>) -> Result<Self, KeyError> {
159 let dir = dir.as_ref().to_path_buf();
160 fs::create_dir_all(&dir)?;
161
162 let machine_key = derive_machine_key(&dir)?;
163
164 Ok(Self {
165 dir,
166 machine_key,
167 cache: Arc::new(RwLock::new(HashMap::new())),
168 })
169 }
170
171 /// Generates a new Ed25519 keypair, encrypts and stores it.
172 /// If `set_default` is true (or there is no current default), makes
173 /// this key the default signing key.
174 pub fn generate(&self, set_default: bool) -> Result<KeyInfo, KeyError> {
175 let key_id = new_key_id();
176
177 let signer = Ed25519Signer::generate(&key_id)
178 .map_err(|e| KeyError::Crypto(e.to_string()))?;
179
180 // `secret` is a Zeroizing<[u8; 32]> -- the caller-side copy of the
181 // signer's secret scalar is wiped on scope exit. `signer` is dropped
182 // at end of fn, which wipes its own copy via the Drop impl in
183 // attestation::signer.
184 let secret = signer.secret_bytes();
185 let pub_key = signer.public_key_bytes();
186
187 let enc = encrypt_for_disk_v2(&self.machine_key, key_id.as_str(), &pub_key, secret.as_slice())
188 .map_err(KeyError::Crypto)?;
189
190 let entry = EncryptedEntry {
191 id: key_id.clone(),
192 algorithm: "ed25519".into(),
193 created_at: crate::statements::unix_to_rfc3339(unix_now()),
194 public_key: pub_key.clone(),
195 enc_priv_key: enc,
196 // v2 ciphertexts carry their nonce inline (bytes [2..14]).
197 // The separate `nonce` field is retained for v1 legacy
198 // compatibility; for fresh v2 entries we serialize an empty
199 // vec so the JSON stays well-formed.
200 nonce: Vec::new(),
201 valid_until: None,
202 successor_key_id: None,
203 };
204
205 self.write_entry(&entry)?;
206
207 // Update manifest.
208 let mut manifest = self.read_manifest()?;
209 manifest.key_ids.push(key_id.clone());
210 if set_default || manifest.default_key_id.is_none() {
211 manifest.default_key_id = Some(key_id.clone());
212 }
213 self.write_manifest(&manifest)?;
214
215 // Populate cache.
216 self.cache.write().unwrap().insert(key_id.clone(), entry);
217
218 Ok(KeyInfo {
219 id: key_id.clone(),
220 algorithm: "ed25519".into(),
221 is_default: manifest.default_key_id.as_deref() == Some(key_id.as_str()),
222 created_at: crate::statements::unix_to_rfc3339(unix_now()),
223 fingerprint: fingerprint(&pub_key),
224 public_key: pub_key,
225 valid_until: None,
226 successor_key_id: None,
227 })
228 }
229
230 /// Rotate the current default key (or a specific key) to a freshly
231 /// generated successor.
232 ///
233 /// Mints a new Ed25519 keypair, links the predecessor to it via
234 /// `successor_key_id`, and stamps the predecessor with a `valid_until`
235 /// of `now + grace_period`. The grace window lets verifiers continue to
236 /// accept signatures from the predecessor while clients catch up to
237 /// the new public key.
238 ///
239 /// If `set_default` is true (the typical case -- you rotate because you
240 /// want to start signing with the new key immediately), the successor
241 /// becomes the default. Pass `false` to stage a rotation for review
242 /// without flipping the active signer.
243 ///
244 /// `predecessor_id` may be `None` to rotate the current default. Pass
245 /// an explicit id to rotate a non-default key (e.g. a per-environment
246 /// secondary).
247 ///
248 /// Note on threat model: this is a graceful rotation primitive, not a
249 /// revocation primitive. If the predecessor key is suspected compromised
250 /// the grace_period should be `Duration::ZERO` (or use a future
251 /// `revoke()` call once that lands) so the predecessor's `valid_until`
252 /// is in the past and any verifier honoring the metadata refuses
253 /// further signatures from it.
254 pub fn rotate(
255 &self,
256 predecessor_id: Option<&str>,
257 grace_period: std::time::Duration,
258 set_default: bool,
259 ) -> Result<RotationResult, KeyError> {
260 // Resolve predecessor: explicit id, else the current default.
261 let pred_id = match predecessor_id {
262 Some(id) => id.to_string(),
263 None => self.default_key_id()?,
264 };
265
266 // Refuse to rotate a key that has already been rotated -- the
267 // chain head is the only valid rotation source. This makes the
268 // operation idempotent in the face of accidental re-runs.
269 let pred_entry_existing = self.load_entry(&pred_id)?;
270 if let Some(existing) = &pred_entry_existing.successor_key_id {
271 return Err(KeyError::Crypto(format!(
272 "key {pred_id} has already been rotated to {existing}; \
273 rotate the chain head instead"
274 )));
275 }
276
277 // Mint the successor. We deliberately do NOT call `self.generate()`
278 // because that path also updates the manifest's default. We need a
279 // single transactional update that sets both predecessor metadata
280 // AND (optionally) the new default in one manifest write.
281 let succ_id = new_key_id();
282 let signer = Ed25519Signer::generate(&succ_id)
283 .map_err(|e| KeyError::Crypto(e.to_string()))?;
284 // `succ_secret` is a Zeroizing<[u8; 32]>; the caller-side copy is
285 // wiped on scope exit, and `signer` is dropped at end of fn (which
286 // wipes its own copy via the attestation::signer Drop impl).
287 let succ_secret = signer.secret_bytes();
288 let succ_pub_key = signer.public_key_bytes();
289 let succ_enc =
290 encrypt_for_disk_v2(&self.machine_key, succ_id.as_str(), &succ_pub_key, succ_secret.as_slice())
291 .map_err(KeyError::Crypto)?;
292
293 let succ_created = crate::statements::unix_to_rfc3339(unix_now());
294 let succ_entry = EncryptedEntry {
295 id: succ_id.clone(),
296 algorithm: "ed25519".into(),
297 created_at: succ_created.clone(),
298 public_key: succ_pub_key.clone(),
299 enc_priv_key: succ_enc,
300 // v2 ciphertexts carry their nonce inline; the legacy
301 // `nonce` field is left empty for fresh writes.
302 nonce: Vec::new(),
303 valid_until: None,
304 successor_key_id: None,
305 };
306
307 // Stamp the predecessor with the grace deadline and link forward.
308 let valid_until = crate::statements::unix_to_rfc3339(
309 unix_now() + grace_period.as_secs(),
310 );
311 let mut pred_entry = pred_entry_existing;
312 pred_entry.valid_until = Some(valid_until.clone());
313 pred_entry.successor_key_id = Some(succ_id.clone());
314
315 // Write order matters for partial-failure recovery. Persist the
316 // successor entry FIRST, then stamp the predecessor pointing at
317 // it. If we wrote the predecessor first and then the successor
318 // write failed, the predecessor's successor_key_id would dangle
319 // at a key that doesn't exist on disk -- and the
320 // already-been-rotated guard would refuse to retry. With this
321 // order:
322 // - successor write fails: nothing observable changed; retry clean.
323 // - predecessor write fails: orphan successor key file on disk
324 // (not yet referenced by manifest or by any other key); retry
325 // generates a new successor and the orphan is harmless.
326 // - manifest write fails: predecessor + successor both on disk,
327 // manifest stale; retry's already-rotated guard catches the
328 // half-finished state and surfaces a clear error.
329 self.write_entry(&succ_entry)?;
330 self.write_entry(&pred_entry)?;
331
332 // Refresh the cache to mirror the on-disk state we just wrote --
333 // BEFORE the manifest update. If the manifest write fails, the
334 // cache must still match disk so a same-process retry sees the
335 // half-rotated state and the already-rotated guard fires
336 // correctly. Doing this AFTER write_manifest would leave a
337 // window where disk reflects the rotation but the in-memory
338 // cache still serves the unstamped predecessor, and a retry
339 // from the same Store instance would generate a duplicate
340 // successor -- defeating the whole point of the guard.
341 {
342 let mut cache = self.cache.write().unwrap();
343 cache.insert(pred_entry.id.clone(), pred_entry.clone());
344 cache.insert(succ_id.clone(), succ_entry.clone());
345 }
346
347 // Update the manifest: register the new key, optionally promote it.
348 let mut manifest = self.read_manifest()?;
349 manifest.key_ids.push(succ_id.clone());
350 if set_default {
351 manifest.default_key_id = Some(succ_id.clone());
352 }
353 self.write_manifest(&manifest)?;
354
355 let default_id = manifest.default_key_id.clone();
356 let predecessor = KeyInfo {
357 id: pred_entry.id.clone(),
358 algorithm: pred_entry.algorithm.clone(),
359 is_default: default_id.as_deref() == Some(pred_entry.id.as_str()),
360 created_at: pred_entry.created_at.clone(),
361 fingerprint: fingerprint(&pred_entry.public_key),
362 public_key: pred_entry.public_key.clone(),
363 valid_until: pred_entry.valid_until.clone(),
364 successor_key_id: pred_entry.successor_key_id.clone(),
365 };
366 let successor = KeyInfo {
367 id: succ_id.clone(),
368 algorithm: "ed25519".into(),
369 is_default: default_id.as_deref() == Some(succ_id.as_str()),
370 created_at: succ_created,
371 fingerprint: fingerprint(&succ_pub_key),
372 public_key: succ_pub_key,
373 valid_until: None,
374 successor_key_id: None,
375 };
376
377 Ok(RotationResult {
378 predecessor,
379 successor,
380 grace_period_until: valid_until,
381 })
382 }
383
384 /// Walk the rotation chain forward from `id`, returning the ordered
385 /// list of key ids: `[id, successor_of_id, ...]`. The first element is
386 /// always `id` itself. Stops at a key with no `successor_key_id`.
387 pub fn successor_chain(&self, id: &str) -> Result<Vec<KeyId>, KeyError> {
388 let mut chain = Vec::new();
389 let mut cursor = id.to_string();
390 // Cap iterations at the manifest size to defend against a corrupt
391 // chain that loops back on itself. A well-formed chain is bounded
392 // by the number of keys in the keystore.
393 let max_steps = self.read_manifest()?.key_ids.len() + 1;
394 for _ in 0..max_steps {
395 chain.push(cursor.clone());
396 let entry = self.load_entry(&cursor)?;
397 match entry.successor_key_id {
398 Some(next) => cursor = next,
399 None => return Ok(chain),
400 }
401 }
402 Err(KeyError::Crypto(format!(
403 "rotation chain starting at {id} exceeds keystore size; suspected loop"
404 )))
405 }
406
407 /// Returns the `KeyInfo` for every key whose `valid_until` is either
408 /// unset or strictly after `at_unix_secs`. The result includes both
409 /// rotated-but-still-in-grace predecessors and never-rotated keys.
410 /// Useful for building a verifier's accept-set as of a given time.
411 pub fn valid_keys_at(&self, at_unix_secs: u64) -> Result<Vec<KeyInfo>, KeyError> {
412 let cutoff_rfc = crate::statements::unix_to_rfc3339(at_unix_secs);
413 Ok(self.list()?
414 .into_iter()
415 .filter(|k| match &k.valid_until {
416 None => true,
417 Some(until) => until.as_str() > cutoff_rfc.as_str(),
418 })
419 .collect())
420 }
421
422 /// Returns a boxed `Signer` for the current default key.
423 pub fn default_signer(&self) -> Result<Box<dyn Signer>, KeyError> {
424 let manifest = self.read_manifest()?;
425 let id = manifest.default_key_id.ok_or(KeyError::NoDefaultKey)?;
426 self.signer(&id)
427 }
428
429 /// Returns a boxed `Signer` for a specific key ID.
430 ///
431 /// Refuses to load if the on-disk key file has insecure permissions
432 /// (any group or world bits). This is the choke point for *all*
433 /// signing — public-key reads and successor lookups go through
434 /// `read_entry` / `public_key` and are not affected.
435 ///
436 /// Bypass with `TREESHIP_ALLOW_INSECURE_KEY_PERMS=1` for controlled
437 /// environments (CI sandboxes, recovery flows). The bypass should
438 /// not be set in normal operation.
439 ///
440 /// TOCTOU note: the perm-check and the ciphertext read run against
441 /// the SAME file descriptor (open once, fstat, then read from that
442 /// fd). The previous shape — `check_key_file_perms(path)` followed
443 /// by `load_entry(id)` (which called `fs::read(path)`) — opened the
444 /// file twice. An attacker with write access to `~/.treeship/keys/`
445 /// could swap the file between the two opens: first present an
446 /// owner-only file to pass the perm gate, then replace it with a
447 /// different (loose-perm) file containing an attacker-controlled
448 /// scalar before the second `open`. The single-fd shape closes that
449 /// window because the inode is pinned by the open file descriptor;
450 /// path-level swaps after the open don't affect what we read. This
451 /// matches the pattern in `session/event_log.rs::open_lock_file`.
452 pub fn signer(&self, id: &str) -> Result<Box<dyn Signer>, KeyError> {
453 let entry = self.read_entry_with_perm_check(id)?;
454
455 // Dispatcher: v2 ciphertexts start with magic 0x54, version 0x02
456 // and use real AES-256-GCM. Older entries fall through to the
457 // legacy SHA-256-CTR+HMAC path (`decrypt_legacy_v1`) and are
458 // transparently re-encrypted in the new format below.
459 let was_legacy = is_legacy_v1(&entry.enc_priv_key);
460 let secret = decrypt_from_disk(
461 &self.machine_key,
462 &entry.id,
463 &entry.public_key,
464 &entry.enc_priv_key,
465 &entry.nonce,
466 )
467 .map_err(|e| self.enrich_crypto_error(e))?;
468
469 // L3: wrap the on-stack copy of the decrypted secret in a
470 // `Zeroizing` so the byte buffer is wiped on drop. `secret`
471 // itself is already a `Zeroizing<Vec<u8>>` returned by
472 // `decrypt_from_disk`, but `try_into::<[u8; 32]>` produces an
473 // independent stack-allocated array that the Vec's Drop will
474 // not cover. Without this wrapper, returning from `signer()`
475 // would leave the secret scalar in stale stack memory until
476 // a future stack frame happens to overwrite it.
477 let secret_arr: Zeroizing<[u8; 32]> = Zeroizing::new(
478 secret.as_slice().try_into()
479 .map_err(|_| KeyError::Crypto("decrypted key is wrong length".into()))?
480 );
481
482 // Transparent migration: if this entry was still in the legacy
483 // v1 format (the broken SHA-256-CTR construction from
484 // TS-2026-001), re-encrypt it with v2 AES-256-GCM and rewrite
485 // the file. We do this best-effort -- a migration failure here
486 // must NOT block signing for the current call, since the
487 // in-memory secret is already valid. The next decrypt on a
488 // fresh process will retry.
489 if was_legacy {
490 if let Err(e) = self.migrate_entry_to_v2(&entry, &secret_arr) {
491 // Surface the failure as a tracing-style stderr note
492 // rather than an error -- the user's signing flow is
493 // unaffected, and we'd rather them know about it than
494 // wedge the call.
495 eprintln!(
496 "treeship: keystore entry {} could not be migrated \
497 from legacy v1 format to v2 ({}); will retry next \
498 load",
499 entry.id, e
500 );
501 }
502 }
503
504 let signer = Ed25519Signer::from_bytes(&entry.id, &secret_arr)
505 .map_err(|e| KeyError::Crypto(e.to_string()))?;
506
507 Ok(Box::new(signer))
508 }
509
510 /// Re-encrypt a legacy v1 entry with the new v2 AEAD and persist
511 /// it. Updates the in-memory cache so subsequent loads in the same
512 /// process see the migrated entry. Idempotent; safe to invoke
513 /// concurrently because the migration is serialized by a per-entry
514 /// advisory lock on `<entry>.migrate.lock` (TS-2026-001 H3).
515 ///
516 /// We lock a *sentinel* file rather than the entry file itself,
517 /// because the entry file is renamed-into-place during the atomic
518 /// write inside `write_entry`. Holding a flock on the entry's inode
519 /// while a sibling process renames a new inode into its path is
520 /// nonsensical (the lock would survive on the now-orphaned inode);
521 /// the sentinel sidecar has a stable identity for the whole
522 /// migration window.
523 ///
524 /// Same blocking-flock pattern as `packages/core/src/session/event_log.rs`
525 /// (Lane F): exclusive lock, then a same-thread re-read to settle
526 /// "did a peer already migrate while I was waiting?" cleanly.
527 fn migrate_entry_to_v2(
528 &self,
529 old_entry: &EncryptedEntry,
530 secret: &[u8; 32],
531 ) -> Result<(), KeyError> {
532 let entry_path = self.entry_path(&old_entry.id);
533 let lock_path = entry_path.with_extension("migrate.lock");
534
535 // Open (or create) the sentinel lock file with restrictive perms
536 // and take an exclusive flock. We intentionally use the blocking
537 // `lock_exclusive` -- not `try_lock_exclusive` -- because the
538 // migration window is short (a single AEAD encrypt + atomic
539 // rename) and the worst case under contention is one writer
540 // serialized behind another. Pulling the
541 // try-with-bounded-retry pattern in here would buy us nothing:
542 // the second writer's re-read after the lock releases would
543 // observe the now-v2 entry and short-circuit.
544 let lock_file = open_migration_lock_file(&lock_path)
545 .map_err(KeyError::Io)?;
546
547 #[cfg(not(target_family = "wasm"))]
548 {
549 use fs2::FileExt;
550 lock_file.lock_exclusive().map_err(KeyError::Io)?;
551 }
552
553 // Under the lock: did a peer already complete the migration
554 // while we were waiting? If so, our work is done -- we must
555 // NOT rewrite, because we'd overwrite a peer's freshly-rotated
556 // v2 ciphertext with our own (semantically equivalent, but
557 // unnecessary I/O and an unnecessary cache update).
558 if let Ok(current) = self.read_entry(&old_entry.id) {
559 if !is_legacy_v1(¤t.enc_priv_key) {
560 // Peer already migrated. Refresh the cache so subsequent
561 // loads in this process see the v2 entry rather than
562 // the stale legacy copy our caller passed in.
563 if let Ok(mut cache) = self.cache.write() {
564 cache.insert(current.id.clone(), current);
565 }
566 // Lock drops at function exit; sentinel file remains on
567 // disk as a harmless inode (no migration data, idempotent
568 // for future invocations).
569 return Ok(());
570 }
571 }
572
573 let new_ciphertext = encrypt_for_disk_v2(
574 &self.machine_key,
575 &old_entry.id,
576 &old_entry.public_key,
577 secret,
578 )
579 .map_err(KeyError::Crypto)?;
580
581 let migrated = EncryptedEntry {
582 id: old_entry.id.clone(),
583 algorithm: old_entry.algorithm.clone(),
584 created_at: old_entry.created_at.clone(),
585 public_key: old_entry.public_key.clone(),
586 enc_priv_key: new_ciphertext,
587 // v2 carries the nonce inline; clear the legacy field.
588 nonce: Vec::new(),
589 valid_until: old_entry.valid_until.clone(),
590 successor_key_id: old_entry.successor_key_id.clone(),
591 };
592
593 self.write_entry(&migrated)?;
594 if let Ok(mut cache) = self.cache.write() {
595 cache.insert(migrated.id.clone(), migrated);
596 }
597
598 // Best-effort cleanup of the sentinel lock file. We hold the
599 // lock until function exit (drop), so by the time we reach
600 // here it is safe to unlink the inode -- future migrations
601 // for this entry will succeed via the early-return path
602 // because the entry is now v2. Leaving the sentinel behind is
603 // also harmless; on Unix removing a flocked file is allowed
604 // and the lock is released on fd drop regardless.
605 let _ = std::fs::remove_file(&lock_path);
606
607 // Keep the lock_file binding alive to function exit so the
608 // flock is held across write_entry + remove_file. Explicit
609 // drop makes the intent obvious to readers.
610 drop(lock_file);
611 Ok(())
612 }
613
614 /// Wrap a bare crypto error (typically "MAC verification failed ..." from
615 /// the AES-GCM decrypt path) with a diagnostic and an actionable recovery
616 /// path.
617 ///
618 /// The common failure mode in the wild is a pre-0.9.x keystore whose
619 /// machine-key derivation was seed-file-based. Later versions derive
620 /// the machine key from hostname+username (macOS) or /etc/machine-id
621 /// (Linux), so old ciphertexts can't be MAC-verified with the new key.
622 /// Detecting that case is best-effort: the presence of a legacy seed
623 /// file (`.machineseed` or `machine_seed` inside the keys dir) is a
624 /// strong hint. If we see one, call it out explicitly.
625 fn enrich_crypto_error(&self, raw: String) -> KeyError {
626 // Only enrich on MAC failures -- other errors (I/O, wrong length) are
627 // surfaced as-is because their remediation differs.
628 if !raw.contains("MAC verification failed") {
629 return KeyError::Crypto(raw);
630 }
631
632 let legacy_seed_dot = self.dir.join(".machineseed");
633 let legacy_seed = self.dir.join("machine_seed");
634 let has_legacy_seed = legacy_seed_dot.exists() || legacy_seed.exists();
635
636 let diagnosis = if has_legacy_seed {
637 "your keystore was created by an older Treeship version whose \
638 machine-key derivation has since changed. The ciphertext is \
639 intact but cannot be decrypted under the current derivation."
640 } else {
641 "the keystore cannot be decrypted. Usual causes: the key file \
642 was copied from a different machine, the hostname or username \
643 changed, or the file was corrupted."
644 };
645
646 // Resolve the user's ~/.treeship path for the recovery command, so
647 // we give a copy-pasteable command rather than a generic instruction.
648 let ts_dir = std::env::var("HOME")
649 .map(|h| format!("{h}/.treeship"))
650 .unwrap_or_else(|_| "~/.treeship".into());
651
652 // The outer KeyError::Crypto Display impl already prepends
653 // "keys crypto: "; don't double it. Start with the raw MAC error
654 // so the user still sees the underlying cryptographic reason,
655 // then follow with the human-readable diagnosis and recovery.
656 let msg = format!(
657 "{raw}\n\n \
658 Diagnosis: {diagnosis}\n\n \
659 Recovery (nondestructive -- the old keystore is moved aside, \
660 not deleted; any sealed .treeship packages you produced remain \
661 verifiable since their receipts embed the old public key):\n\n \
662 mv {ts_dir} {ts_dir}.bak.$(date +%s)\n \
663 treeship init\n"
664 );
665
666 KeyError::Crypto(msg)
667 }
668
669 /// Returns the default key ID.
670 pub fn default_key_id(&self) -> Result<KeyId, KeyError> {
671 self.read_manifest()?
672 .default_key_id
673 .ok_or(KeyError::NoDefaultKey)
674 }
675
676 /// Lists all keys.
677 pub fn list(&self) -> Result<Vec<KeyInfo>, KeyError> {
678 let manifest = self.read_manifest()?;
679 let default = manifest.default_key_id.as_deref().unwrap_or("");
680
681 manifest.key_ids.iter().map(|id| {
682 let entry = self.load_entry(id)?;
683 Ok(KeyInfo {
684 id: entry.id.clone(),
685 algorithm: entry.algorithm.clone(),
686 is_default: entry.id == default,
687 created_at: entry.created_at.clone(),
688 fingerprint: fingerprint(&entry.public_key),
689 public_key: entry.public_key.clone(),
690 valid_until: entry.valid_until.clone(),
691 successor_key_id: entry.successor_key_id.clone(),
692 })
693 }).collect()
694 }
695
696 /// Sets the default signing key.
697 pub fn set_default(&self, id: &str) -> Result<(), KeyError> {
698 // Verify the key exists before updating the manifest.
699 self.load_entry(id)?;
700 let mut manifest = self.read_manifest()?;
701 manifest.default_key_id = Some(id.to_string());
702 self.write_manifest(&manifest)
703 }
704
705 /// Returns the public key bytes for a key ID.
706 pub fn public_key(&self, id: &str) -> Result<Vec<u8>, KeyError> {
707 Ok(self.load_entry(id)?.public_key)
708 }
709
710 // --- private ---
711
712 fn load_entry(&self, id: &str) -> Result<EncryptedEntry, KeyError> {
713 // Check cache first.
714 if let Ok(cache) = self.cache.read() {
715 if let Some(entry) = cache.get(id) {
716 return Ok(entry.clone());
717 }
718 }
719 self.read_entry(id)
720 }
721
722 fn entry_path(&self, id: &str) -> PathBuf {
723 self.dir.join(format!("{}.json", id))
724 }
725
726 fn write_entry(&self, entry: &EncryptedEntry) -> Result<(), KeyError> {
727 let path = self.entry_path(&entry.id);
728 let json = serde_json::to_vec_pretty(entry)?;
729 write_file_600(&path, &json)?;
730 Ok(())
731 }
732
733 fn read_entry(&self, id: &str) -> Result<EncryptedEntry, KeyError> {
734 let path = self.entry_path(id);
735 if !path.exists() {
736 return Err(KeyError::NotFound(id.to_string()));
737 }
738 let bytes = fs::read(&path)?;
739 let entry: EncryptedEntry = serde_json::from_slice(&bytes)?;
740 Ok(entry)
741 }
742
743 /// Single-open, race-free counterpart to `read_entry` for the
744 /// signing path. Opens the key file ONCE, fstat's the file
745 /// descriptor to check perms, then reads the JSON from the SAME
746 /// descriptor. The path is never re-resolved after the open, so an
747 /// attacker who swaps `<id>.json` on disk between the perm check
748 /// and the ciphertext read cannot influence the bytes we decrypt.
749 ///
750 /// Cache: this path intentionally skips the in-memory entry cache.
751 /// The cache is read-mostly and seeded by `load_entry`, which is
752 /// fine for public-key lookups but defeats the perm gate (a cached
753 /// entry would let `signer()` return without ever consulting the
754 /// on-disk perms). The signing path is rare enough that the extra
755 /// disk read is not a hot spot.
756 fn read_entry_with_perm_check(&self, id: &str) -> Result<EncryptedEntry, KeyError> {
757 let path = self.entry_path(id);
758
759 // Open once. NotFound surfaces as `KeyError::NotFound` to
760 // match the legacy `read_entry` shape; any other I/O error
761 // (permission denied at the *open* layer, EIO, etc.)
762 // propagates via the `From<io::Error>` impl.
763 let mut file = match fs::File::open(&path) {
764 Ok(f) => f,
765 Err(e) if e.kind() == io::ErrorKind::NotFound => {
766 return Err(KeyError::NotFound(id.to_string()));
767 }
768 Err(e) => return Err(KeyError::Io(e)),
769 };
770
771 // Perm check on the open fd. On Unix `File::metadata` is
772 // documented to call `fstat` on the underlying fd, which pins
773 // the inode -- a subsequent path swap on disk cannot change
774 // what we see. The bypass env var continues to short-circuit.
775 check_open_key_file_perms(&path, &file)?;
776
777 // Read the full ciphertext envelope from the same fd.
778 let mut bytes = Vec::new();
779 file.read_to_end(&mut bytes)?;
780
781 let entry: EncryptedEntry = serde_json::from_slice(&bytes)?;
782 Ok(entry)
783 }
784
785 fn manifest_path(&self) -> PathBuf {
786 self.dir.join("manifest.json")
787 }
788
789 fn read_manifest(&self) -> Result<Manifest, KeyError> {
790 let path = self.manifest_path();
791 if !path.exists() {
792 return Ok(Manifest::default());
793 }
794 let bytes = fs::read(&path)?;
795 Ok(serde_json::from_slice(&bytes)?)
796 }
797
798 fn write_manifest(&self, m: &Manifest) -> Result<(), KeyError> {
799 let json = serde_json::to_vec_pretty(m)?;
800 write_file_600(&self.manifest_path(), &json)?;
801 Ok(())
802 }
803}
804
805// --- Crypto helpers ---
806//
807// AEAD choice: AES-256-GCM via the RustCrypto `aes-gcm` 0.10 crate.
808// Reasons:
809// - Matches the original (documented but never implemented) intent of
810// the keystore, so audit reports and SECURITY.md don't need to be
811// re-anchored on a different primitive.
812// - Well-audited, widely deployed, no platform gotchas.
813// - `chacha20poly1305` would have been a defensible alternative
814// (slightly better software performance), but the migration cost of
815// changing the documented primitive while we already have to ship a
816// migration for the broken construction is not worth it.
817//
818// On-disk v2 format (`encrypt_for_disk_v2`):
819// [ magic = 0x54 ('T') ] 1 byte
820// [ version = 0x02 ] 1 byte
821// [ nonce ] 12 bytes (random per encryption)
822// [ ciphertext || tag ] N + 16 bytes (tag appended by aead crate)
823//
824// The first byte (0x54) is a structural sentinel so we can dispatch on
825// the format without relying on length heuristics. v1 ciphertexts start
826// with the first byte of their random nonce, so the chance of an
827// accidental v1 entry that looks like v2 is ~1/2^16 (matching both magic
828// AND version byte) and we still re-validate by AEAD-decrypting; if the
829// AEAD fails on something that looks like v2, we fall back to v1.
830
831const KEYSTORE_MAGIC: u8 = 0x54; // 'T'
832const KEYSTORE_VERSION_V2: u8 = 0x02;
833
834/// Build the v2 keystore AEAD AAD.
835///
836/// The AAD binds two things into the GCM tag beyond ciphertext+nonce:
837///
838/// 1. **Framing prefix** (`[KEYSTORE_MAGIC, KEYSTORE_VERSION_V2]`) so
839/// flipping the magic or version byte on disk surfaces as a MAC
840/// failure rather than dispatcher confusion (the M2 audit finding).
841/// 2. **Entry identity** (`entry_id` and `public_key`) so an attacker
842/// with write access to `~/.treeship/keys/` cannot copy entry A's
843/// `enc_priv_key` ciphertext into entry B's JSON envelope. Without
844/// this binding, the swap would decrypt cleanly (same machine key,
845/// same framing-only AAD) and the signer for advertised key id A
846/// would silently sign with key B's secret scalar — un-binding
847/// `KeyInfo.public_key` from the actual scalar in use. This closes
848/// the "intra-keystore swap" class flagged in the post-merge audit
849/// of TS-2026-001.
850///
851/// Every variable-length field is length-prefixed with a big-endian
852/// u32 before its bytes. Concatenating variable-length fields without
853/// length prefixes is a forgery class (an attacker who controls field
854/// boundaries can shift bytes between fields and present a different
855/// `(entry_id, public_key)` pair whose AAD-bytes serialize identically).
856/// `entry_id` is a fixed-prefix `key_<hex>` string in practice, but we
857/// length-prefix it anyway to defend against future id schemes.
858///
859/// The AAD must be byte-identical on encrypt and decrypt. Future
860/// versions (V3+) get their own builder; the dispatcher picks which
861/// to use based on the framing prefix.
862fn build_aad_v2(entry_id: &str, public_key: &[u8]) -> Vec<u8> {
863 let mut aad = Vec::with_capacity(2 + 4 + entry_id.len() + 4 + public_key.len());
864 aad.push(KEYSTORE_MAGIC);
865 aad.push(KEYSTORE_VERSION_V2);
866 aad.extend_from_slice(&(entry_id.len() as u32).to_be_bytes());
867 aad.extend_from_slice(entry_id.as_bytes());
868 aad.extend_from_slice(&(public_key.len() as u32).to_be_bytes());
869 aad.extend_from_slice(public_key);
870 aad
871}
872
873/// AES-256-GCM (the real one) encrypt for at-rest keystore storage.
874/// Returns the framed v2 blob ready to drop into `EncryptedEntry::enc_priv_key`.
875///
876/// Output: `[magic, version, nonce(12), ciphertext || tag(16)]`.
877///
878/// The AEAD's Associated Authenticated Data binds:
879/// - the framing prefix (M2 — flipping magic/version surfaces as MAC failure)
880/// - the entry id and public key (post-merge audit fix-up — closes the
881/// intra-keystore swap class where a local attacker copies entry A's
882/// `enc_priv_key` into entry B's JSON envelope).
883///
884/// See `build_aad_v2` for the exact layout. `entry_id` and `public_key`
885/// must match what gets serialized into the `EncryptedEntry` JSON;
886/// `decrypt_for_disk_v2` reads them back from the deserialized entry
887/// to recompute the AAD.
888fn encrypt_for_disk_v2(
889 key: &[u8; 32],
890 entry_id: &str,
891 public_key: &[u8],
892 plaintext: &[u8],
893) -> Result<Vec<u8>, String> {
894 // Wrap the in-memory AEAD key in Zeroizing so the local stack copy
895 // is wiped on drop. The aes-gcm cipher object owns its own internal
896 // expanded key schedule; that's outside our control, but the raw
897 // 32-byte buffer at this scope is ours to clear.
898 let key_buf: Zeroizing<[u8; 32]> = Zeroizing::new(*key);
899 let aead_key: &AesKey<Aes256Gcm> = AesKey::<Aes256Gcm>::from_slice(key_buf.as_slice());
900 let cipher = Aes256Gcm::new(aead_key);
901
902 // 96-bit random nonce from the OS CSPRNG.
903 let nonce = Aes256Gcm::generate_nonce(&mut AeadOsRng);
904
905 let aad = build_aad_v2(entry_id, public_key);
906 let ciphertext = cipher
907 .encrypt(
908 &nonce,
909 Payload {
910 msg: plaintext,
911 aad: aad.as_slice(),
912 },
913 )
914 .map_err(|e| format!("aead encrypt failed: {e}"))?;
915
916 let mut out = Vec::with_capacity(2 + 12 + ciphertext.len());
917 out.push(KEYSTORE_MAGIC);
918 out.push(KEYSTORE_VERSION_V2);
919 out.extend_from_slice(nonce.as_slice());
920 out.extend_from_slice(&ciphertext);
921 Ok(out)
922}
923
924/// AES-256-GCM decrypt of a v2 framed blob. Uses the same AAD binding
925/// as `encrypt_for_disk_v2`:
926/// - framing prefix (so a tampered magic/version surfaces as MAC failure)
927/// - entry id + public key (so swapping `enc_priv_key` between entries
928/// in the same keystore surfaces as MAC failure).
929///
930/// `entry_id` and `public_key` come from the `EncryptedEntry` JSON
931/// envelope that holds `blob`. The caller is responsible for passing the
932/// *envelope's* id and pubkey, not values from some other source — that
933/// is precisely what binds the ciphertext to its envelope.
934fn decrypt_v2(
935 key: &[u8; 32],
936 entry_id: &str,
937 public_key: &[u8],
938 blob: &[u8],
939) -> Result<Vec<u8>, String> {
940 // Minimum: magic(1) + version(1) + nonce(12) + tag(16) = 30 bytes.
941 if blob.len() < 30 {
942 return Err("v2 ciphertext too short".into());
943 }
944 if blob[0] != KEYSTORE_MAGIC || blob[1] != KEYSTORE_VERSION_V2 {
945 return Err("v2 ciphertext has wrong magic/version".into());
946 }
947 let nonce_bytes = &blob[2..14];
948 let ct = &blob[14..];
949
950 let key_buf: Zeroizing<[u8; 32]> = Zeroizing::new(*key);
951 let aead_key: &AesKey<Aes256Gcm> = AesKey::<Aes256Gcm>::from_slice(key_buf.as_slice());
952 let cipher = Aes256Gcm::new(aead_key);
953 let nonce = Nonce::from_slice(nonce_bytes);
954
955 let aad = build_aad_v2(entry_id, public_key);
956 cipher
957 .decrypt(
958 nonce,
959 Payload {
960 msg: ct,
961 aad: aad.as_slice(),
962 },
963 )
964 .map_err(|_| "MAC verification failed — key file may be corrupt or wrong machine".into())
965}
966
967/// Returns true iff `blob` is shaped like a v1 (legacy) ciphertext.
968/// Used by the dispatcher to decide whether a successful decrypt should
969/// trigger a transparent re-encrypt to v2.
970fn is_legacy_v1(blob: &[u8]) -> bool {
971 // A v2 blob always starts with [magic, version]. Anything else
972 // (including the empty enc_priv_key case during partial writes) is
973 // treated as legacy and routed through the v1 path, which will fail
974 // cleanly on garbage.
975 !(blob.len() >= 2 && blob[0] == KEYSTORE_MAGIC && blob[1] == KEYSTORE_VERSION_V2)
976}
977
978/// Top-level decrypt dispatcher used by the keystore. Tries v2 if the
979/// blob carries the magic+version prefix, otherwise falls through to the
980/// legacy v1 path. If a blob looks like v2 but AEAD verification fails,
981/// we also try v1 — this defends against the (negligible) probability
982/// that a legacy ciphertext's random first two bytes happen to collide
983/// with our magic+version.
984///
985/// M1 (TS-2026-001 audit): when the blob is v2-shaped and BOTH the v2
986/// AEAD and the v1 fallback fail, surface the v2 error rather than the
987/// v1 error. v1's failure on a v2-shaped blob is mechanical (wrong
988/// MAC computed under the wrong construction) and tells the user
989/// nothing useful; v2's failure is the actually-relevant signal
990/// (MAC verification under the documented AEAD). The previous code
991/// would mask the meaningful error with a confused legacy error
992/// message that pointed at the wrong remediation.
993fn decrypt_from_disk(
994 key: &[u8; 32],
995 entry_id: &str,
996 public_key: &[u8],
997 enc_data: &[u8],
998 legacy_nonce_field: &[u8],
999) -> Result<Zeroizing<Vec<u8>>, String> {
1000 if !is_legacy_v1(enc_data) {
1001 match decrypt_v2(key, entry_id, public_key, enc_data) {
1002 Ok(pt) => return Ok(Zeroizing::new(pt)),
1003 Err(v2_err) => {
1004 // Collision fallback. v1 entries had random first bytes;
1005 // there's a vanishing chance one looks like v2 framing.
1006 // Try v1 first; if it succeeds we have a legitimate
1007 // legacy entry whose framing happens to look v2-shaped.
1008 // If v1 also fails, surface the v2 error (the
1009 // semantically meaningful one) rather than v1's
1010 // mechanical-junk failure.
1011 return match decrypt_legacy_v1(key, enc_data, legacy_nonce_field) {
1012 Ok(pt) => Ok(Zeroizing::new(pt)),
1013 Err(_) => Err(v2_err),
1014 };
1015 }
1016 }
1017 }
1018 decrypt_legacy_v1(key, enc_data, legacy_nonce_field).map(Zeroizing::new)
1019}
1020
1021/// DEPRECATED: legacy at-rest decryption for keystores written before
1022/// v0.10.3. This is the SHA-256-CTR + HMAC-SHA-256 construction that
1023/// was mis-labelled as AES-256-GCM (TS-2026-001). The CTR keystream is
1024/// also degenerate (the same `enc_key` byte is reused once per
1025/// plaintext byte, since `block[i % 32]` indexes the same SHA-256 output
1026/// modulo 32), so the construction is NOT a real stream cipher even
1027/// ignoring the AEAD mislabelling.
1028///
1029/// Kept ONLY to migrate existing on-disk keystores forward to the v2
1030/// AEAD format. Never call this for new writes. The encrypt counterpart
1031/// has been removed from the v2 codepath — the only place v1
1032/// ciphertexts come from is files written by older Treeship versions.
1033pub fn aes_gcm_decrypt(
1034 key: &[u8; 32],
1035 enc_data: &[u8],
1036 _nonce_unused: &[u8],
1037) -> Result<Vec<u8>, String> {
1038 // Preserved as a public symbol because the `treeship-vi` sibling
1039 // crate calls it directly. vi only ever produces v1 ciphertexts
1040 // (its `aes_gcm_encrypt` shim calls `legacy_v1_encrypt`) and has
1041 // no concept of the `EncryptedEntry` envelope that carries the
1042 // entry id + public key the v2 AAD now requires. Route this shim
1043 // directly through the legacy v1 path so vi's call site keeps
1044 // working byte-for-byte; vi's eventual migration release will
1045 // adopt its own AEAD path with its own envelope binding.
1046 decrypt_legacy_v1(key, enc_data, _nonce_unused)
1047}
1048
1049/// DEPRECATED: legacy at-rest encryption. Same caveats as
1050/// `aes_gcm_decrypt`. Kept ONLY as a public symbol for compatibility
1051/// with the `treeship-vi` sibling crate; the core keystore no longer
1052/// produces v1 ciphertexts.
1053///
1054/// New code MUST use `encrypt_for_disk_v2`. This function still
1055/// produces v1-format output so the vi crate's on-disk format remains
1056/// byte-stable until it migrates on its own cadence.
1057pub fn aes_gcm_encrypt(key: &[u8; 32], plaintext: &[u8]) -> Result<(Vec<u8>, Vec<u8>), String> {
1058 legacy_v1_encrypt(key, plaintext)
1059}
1060
1061/// Legacy v1 encrypt. SHA-256-CTR + HMAC-SHA-256. DO NOT USE for new
1062/// writes — present only so vi-keystore callers keep working until
1063/// they migrate. See `aes_gcm_encrypt` doc-comment for the security
1064/// caveats.
1065fn legacy_v1_encrypt(key: &[u8; 32], plaintext: &[u8]) -> Result<(Vec<u8>, Vec<u8>), String> {
1066 use sha2::Sha256;
1067
1068 let mut nonce = [0u8; 12];
1069 // v0.10.4 P1 audit: nonce reuse breaks AEAD. Read directly from the OS
1070 // CSPRNG via OsRng rather than the userland thread_rng, which can mis-seed
1071 // across forks / on some WASM targets. Legacy v1 write path is kept for
1072 // treeship-vi byte-stability but still needs sound nonces.
1073 OsRng.fill_bytes(&mut nonce);
1074
1075 let mut enc_key_input = key.to_vec();
1076 enc_key_input.extend_from_slice(&nonce);
1077 enc_key_input.extend_from_slice(b"enc");
1078 let enc_key = Sha256::digest(&enc_key_input);
1079
1080 let mut mac_key_input = key.to_vec();
1081 mac_key_input.extend_from_slice(&nonce);
1082 mac_key_input.extend_from_slice(b"mac");
1083 let mac_key = Sha256::digest(&mac_key_input);
1084
1085 let ciphertext: Vec<u8> = plaintext.iter().enumerate().map(|(i, &b)| {
1086 let mut block_input = enc_key.to_vec();
1087 block_input.extend_from_slice(&(i as u64).to_le_bytes());
1088 let block = Sha256::digest(&block_input);
1089 b ^ block[i % 32]
1090 }).collect();
1091
1092 let mut mac_input = mac_key.to_vec();
1093 mac_input.extend_from_slice(&nonce);
1094 mac_input.extend_from_slice(&ciphertext);
1095 let mac = Sha256::digest(&mac_input);
1096
1097 let mut out = Vec::with_capacity(12 + 32 + ciphertext.len());
1098 out.extend_from_slice(&nonce);
1099 out.extend_from_slice(&mac);
1100 out.extend_from_slice(&ciphertext);
1101
1102 Ok((out, nonce.to_vec()))
1103}
1104
1105/// Legacy v1 decrypt. SHA-256-CTR + HMAC-SHA-256. See the module-level
1106/// notes on TS-2026-001 for why this is broken; kept only to migrate
1107/// existing keystores forward.
1108fn decrypt_legacy_v1(
1109 key: &[u8; 32],
1110 enc_data: &[u8],
1111 _nonce_unused: &[u8],
1112) -> Result<Vec<u8>, String> {
1113 if enc_data.len() < 44 {
1114 return Err("ciphertext too short".into());
1115 }
1116 use sha2::Sha256;
1117
1118 let nonce = &enc_data[..12];
1119 let stored_mac = &enc_data[12..44];
1120 let ciphertext = &enc_data[44..];
1121
1122 let nonce_arr: [u8; 12] = nonce.try_into().unwrap();
1123
1124 let mut enc_key_input = key.to_vec();
1125 enc_key_input.extend_from_slice(&nonce_arr);
1126 enc_key_input.extend_from_slice(b"enc");
1127 let enc_key = Sha256::digest(&enc_key_input);
1128
1129 let mut mac_key_input = key.to_vec();
1130 mac_key_input.extend_from_slice(&nonce_arr);
1131 mac_key_input.extend_from_slice(b"mac");
1132 let mac_key = Sha256::digest(&mac_key_input);
1133
1134 let mut mac_input = mac_key.to_vec();
1135 mac_input.extend_from_slice(&nonce_arr);
1136 mac_input.extend_from_slice(ciphertext);
1137 let computed_mac = Sha256::digest(&mac_input);
1138
1139 let mac_ok = stored_mac.iter().zip(computed_mac.iter())
1140 .fold(0u8, |acc, (a, b)| acc | (a ^ b)) == 0;
1141
1142 if !mac_ok {
1143 return Err("MAC verification failed — key file may be corrupt or wrong machine".into());
1144 }
1145
1146 let plaintext: Vec<u8> = ciphertext.iter().enumerate().map(|(i, &b)| {
1147 let mut block_input = enc_key.to_vec();
1148 block_input.extend_from_slice(&(i as u64).to_le_bytes());
1149 let block = Sha256::digest(&block_input);
1150 b ^ block[i % 32]
1151 }).collect();
1152
1153 Ok(plaintext)
1154}
1155
1156// --- Machine key derivation ---
1157
1158pub fn derive_machine_key(store_dir: &Path) -> Result<[u8; 32], KeyError> {
1159 // 1. Linux: /etc/machine-id (stable across reboots)
1160 if let Ok(id) = fs::read_to_string("/etc/machine-id") {
1161 let trimmed = id.trim();
1162 if !trimmed.is_empty() {
1163 let mut h = Sha256::new();
1164 h.update(trimmed.as_bytes());
1165 h.update(store_dir.to_string_lossy().as_bytes());
1166 return Ok(h.finalize().into());
1167 }
1168 }
1169
1170 // 2. macOS: hostname + username derivation (v1, backward compatible).
1171 //
1172 // TODO(v0.7.0): Migrate to IOPlatformSerialNumber-based derivation.
1173 // The serial number is more stable (survives hostname and username
1174 // changes), but switching now would silently invalidate all existing
1175 // keys on macOS. A proper migration needs to:
1176 // 1. Try the new derivation first.
1177 // 2. On decryption failure, fall back to hostname+username.
1178 // 3. If legacy succeeds, re-encrypt with the new key and save.
1179 // Until that migration tooling is in place, keep hostname+username
1180 // as the primary derivation so existing users are not locked out.
1181 #[cfg(target_os = "macos")]
1182 {
1183 let hostname = std::process::Command::new("hostname")
1184 .output()
1185 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
1186 .unwrap_or_default();
1187 let username = std::env::var("USER").unwrap_or_default();
1188 if !hostname.is_empty() && !username.is_empty() {
1189 let mut h = Sha256::new();
1190 h.update(b"treeship-machine-key:");
1191 h.update(hostname.as_bytes());
1192 h.update(b":");
1193 h.update(username.as_bytes());
1194 h.update(b":");
1195 h.update(store_dir.to_string_lossy().as_bytes());
1196 return Ok(h.finalize().into());
1197 }
1198 }
1199
1200 // 3. Fallback: random seed file. Co-located with the keystore so a
1201 // project-local keystore (/proj/.treeship/keys/) keeps its seed at
1202 // /proj/.treeship/machine_seed -- never reaching for ~/.treeship.
1203 // A global keystore (~/.treeship/keys/) co-locates to
1204 // ~/.treeship/machine_seed, which is byte-identical to the
1205 // pre-v0.9.6 location, so existing global keystores keep working.
1206 //
1207 // Backward-compat read order:
1208 // 1. <store_dir>/../machine_seed (the new co-located path)
1209 // 2. ~/.treeship/machine_seed (the old hardcoded path)
1210 // Write order on first creation:
1211 // 1. <store_dir>/../machine_seed if the parent exists/is writable
1212 // 2. ~/.treeship/machine_seed as a last resort
1213 //
1214 // This makes project-local config truly self-contained: an
1215 // isolated /proj keystore can decrypt its own keys even when
1216 // the user's ~/.treeship is corrupt or on a different machine,
1217 // closing the trust-fabric isolation gap that blocked
1218 // project-local smoke tests.
1219 let local_seed_path = store_dir.parent().map(|p| p.join("machine_seed"));
1220 let home = std::env::var("HOME")
1221 .map(std::path::PathBuf::from)
1222 .map_err(|_| KeyError::Crypto("HOME not set".to_string()))?;
1223 let global_seed_path = home.join(".treeship").join("machine_seed");
1224
1225 let seed = if let Some(local) = local_seed_path.as_ref().filter(|p| p.exists()) {
1226 fs::read_to_string(local).map_err(KeyError::Io)?
1227 } else if global_seed_path.exists() {
1228 // Backward-compat: an existing global seed keeps decrypting any
1229 // keystore that was encrypted under it (in particular the
1230 // standard ~/.treeship/keys/ case where local == global).
1231 fs::read_to_string(&global_seed_path).map_err(KeyError::Io)?
1232 } else {
1233 let mut bytes = [0u8; 32];
1234 // v0.10.4 P1 audit: this seed becomes the machine-key fallback used to
1235 // wrap on-disk private keys. Source straight from the OS entropy pool.
1236 OsRng.fill_bytes(&mut bytes);
1237 let seed_hex = hex_encode(&bytes);
1238
1239 // Prefer creating the seed locally. Falls back to the global
1240 // path only when the keystore has no usable parent (rare;
1241 // happens when store_dir is "/" or similar pathological input).
1242 let target = match local_seed_path.as_ref() {
1243 Some(p) => {
1244 let _ = fs::create_dir_all(p.parent().unwrap_or(Path::new(".")));
1245 p.clone()
1246 }
1247 None => {
1248 let _ = fs::create_dir_all(global_seed_path.parent().unwrap_or(Path::new(".")));
1249 global_seed_path.clone()
1250 }
1251 };
1252 fs::write(&target, &seed_hex).map_err(KeyError::Io)?;
1253 #[cfg(unix)]
1254 {
1255 use std::os::unix::fs::PermissionsExt;
1256 let _ = fs::set_permissions(&target, fs::Permissions::from_mode(0o600));
1257 }
1258 seed_hex
1259 };
1260
1261 let mut h = Sha256::new();
1262 h.update(b"treeship-machine-key-fallback:");
1263 h.update(seed.trim().as_bytes());
1264 h.update(b":");
1265 h.update(store_dir.to_string_lossy().as_bytes());
1266 Ok(h.finalize().into())
1267}
1268
1269/// Stable machine key derivation for NEW keys (VI P-256, etc).
1270/// Uses hardware identifiers that survive hostname/user changes.
1271/// For legacy ship Ed25519 keys, use `derive_machine_key()` instead.
1272pub fn derive_machine_key_stable(store_dir: &Path) -> Result<[u8; 32], KeyError> {
1273 // 1. Linux: /etc/machine-id
1274 if let Ok(id) = fs::read_to_string("/etc/machine-id") {
1275 let trimmed = id.trim();
1276 if !trimmed.is_empty() {
1277 let mut h = Sha256::new();
1278 h.update(b"treeship-machine-key-v2:");
1279 h.update(trimmed.as_bytes());
1280 h.update(b":");
1281 h.update(store_dir.to_string_lossy().as_bytes());
1282 return Ok(h.finalize().into());
1283 }
1284 }
1285
1286 // 2. macOS: IOPlatformSerialNumber (hardware serial, stable across
1287 // hostname changes, user renames, non-interactive shells)
1288 #[cfg(target_os = "macos")]
1289 {
1290 if let Ok(output) = std::process::Command::new("ioreg")
1291 .args(["-rd1", "-c", "IOPlatformExpertDevice"])
1292 .output()
1293 {
1294 let stdout = String::from_utf8_lossy(&output.stdout);
1295 for line in stdout.lines() {
1296 if line.contains("IOPlatformSerialNumber") {
1297 if let Some(serial) = line.split('"').nth(3) {
1298 if !serial.is_empty() {
1299 let mut h = Sha256::new();
1300 h.update(b"treeship-machine-key-v2:");
1301 h.update(serial.as_bytes());
1302 h.update(b":");
1303 h.update(store_dir.to_string_lossy().as_bytes());
1304 return Ok(h.finalize().into());
1305 }
1306 }
1307 }
1308 }
1309 }
1310 }
1311
1312 // 3. Fallback: persistent random seed in ~/.treeship/.internal/
1313 // Separate from key material. Mode 0600.
1314 let home = std::env::var("HOME")
1315 .map(std::path::PathBuf::from)
1316 .map_err(|_| KeyError::Crypto("HOME not set".to_string()))?;
1317 let seed_dir = home.join(".treeship").join(".internal");
1318 let _ = fs::create_dir_all(&seed_dir);
1319 #[cfg(unix)]
1320 {
1321 use std::os::unix::fs::PermissionsExt;
1322 let _ = fs::set_permissions(&seed_dir, fs::Permissions::from_mode(0o700));
1323 }
1324
1325 let seed_path = seed_dir.join("machine_seed_v2");
1326 let seed = if seed_path.exists() {
1327 fs::read_to_string(&seed_path).map_err(KeyError::Io)?
1328 } else {
1329 let mut bytes = [0u8; 32];
1330 // v0.10.4 P1 audit: machine_seed_v2 backs the v2 machine-key
1331 // fallback. Same OsRng rationale as the v1 seed above.
1332 OsRng.fill_bytes(&mut bytes);
1333 let seed_hex = hex_encode(&bytes);
1334 fs::write(&seed_path, &seed_hex).map_err(KeyError::Io)?;
1335 #[cfg(unix)]
1336 {
1337 use std::os::unix::fs::PermissionsExt;
1338 let _ = fs::set_permissions(&seed_path, fs::Permissions::from_mode(0o600));
1339 }
1340 seed_hex
1341 };
1342
1343 let mut h = Sha256::new();
1344 h.update(b"treeship-machine-key-v2-fallback:");
1345 h.update(seed.trim().as_bytes());
1346 h.update(b":");
1347 h.update(store_dir.to_string_lossy().as_bytes());
1348 Ok(h.finalize().into())
1349}
1350
1351// --- Utility ---
1352
1353fn new_key_id() -> KeyId {
1354 let mut b = [0u8; 8];
1355 // v0.10.4 P1 audit: key_id is mixed into AAD by encrypt_for_disk_v2, so
1356 // collisions or low-entropy ids would weaken the AAD binding. Use OsRng
1357 // directly so the id is OS-CSPRNG-quality even under fork or odd targets.
1358 OsRng.fill_bytes(&mut b);
1359 format!("key_{}", hex_encode(&b))
1360}
1361
1362fn fingerprint(pub_key: &[u8]) -> String {
1363 let h = Sha256::digest(pub_key);
1364 hex_encode(&h[..8])
1365}
1366
1367fn hex_encode(b: &[u8]) -> String {
1368 b.iter().fold(String::new(), |mut s, byte| {
1369 s.push_str(&format!("{:02x}", byte));
1370 s
1371 })
1372}
1373
1374/// Verify a private-key file has restrictive permissions before loading
1375/// it for signing. Returns `Ok(())` on non-Unix platforms, when the
1376/// `TREESHIP_ALLOW_INSECURE_KEY_PERMS=1` escape hatch is set, or when
1377/// the file is not group/world accessible. Otherwise returns
1378/// `KeyError::InsecureKeyPerms` with the offending path and mode.
1379///
1380/// **TOCTOU caveat:** this path-based check has an unavoidable race
1381/// window between the `stat` and any subsequent `open` of the same
1382/// path. New signing-path callers MUST use
1383/// `check_open_key_file_perms` (fstat on an already-open fd) instead;
1384/// this function is retained only for non-signing callers that
1385/// already accept the race (e.g. `treeship doctor` scanning the
1386/// keystore directory).
1387#[allow(dead_code)]
1388fn check_key_file_perms(path: &Path) -> Result<(), KeyError> {
1389 #[cfg(unix)]
1390 {
1391 use std::os::unix::fs::PermissionsExt;
1392 if std::env::var_os("TREESHIP_ALLOW_INSECURE_KEY_PERMS")
1393 .map(|v| v == "1")
1394 .unwrap_or(false)
1395 {
1396 return Ok(());
1397 }
1398 // Missing files are reported by the caller as NotFound -- don't
1399 // mask that with a perm error.
1400 let meta = match fs::metadata(path) {
1401 Ok(m) => m,
1402 Err(_) => return Ok(()),
1403 };
1404 let mode = meta.permissions().mode();
1405 if mode & 0o077 != 0 {
1406 return Err(KeyError::InsecureKeyPerms {
1407 path: path.to_path_buf(),
1408 mode,
1409 });
1410 }
1411 }
1412 let _ = path;
1413 Ok(())
1414}
1415
1416/// Race-free perm gate: runs `fstat` on an already-open `File` and
1417/// rejects if the mode has any group or world bits. Use this from the
1418/// signing path: open the key file once, hand the resulting `File` to
1419/// this function, then read from the SAME `File` -- the inode is
1420/// pinned by the open fd, so a path-level swap between perm-check and
1421/// read cannot influence what we end up decrypting.
1422///
1423/// `path` is carried only for error reporting; it is never re-opened.
1424/// The `TREESHIP_ALLOW_INSECURE_KEY_PERMS=1` bypass is honored
1425/// identically to `check_key_file_perms` so existing CI workflows keep
1426/// working.
1427#[allow(unused_variables)]
1428fn check_open_key_file_perms(path: &Path, file: &fs::File) -> Result<(), KeyError> {
1429 #[cfg(unix)]
1430 {
1431 use std::os::unix::fs::PermissionsExt;
1432 if std::env::var_os("TREESHIP_ALLOW_INSECURE_KEY_PERMS")
1433 .map(|v| v == "1")
1434 .unwrap_or(false)
1435 {
1436 return Ok(());
1437 }
1438 // `File::metadata` on Unix calls `fstat(fd)` -- it does NOT
1439 // re-resolve the path, so the result describes the same inode
1440 // we will read from. This is the structural property that
1441 // makes the gate race-free.
1442 let meta = file.metadata()?;
1443 let mode = meta.permissions().mode();
1444 if mode & 0o077 != 0 {
1445 return Err(KeyError::InsecureKeyPerms {
1446 path: path.to_path_buf(),
1447 mode,
1448 });
1449 }
1450 }
1451 Ok(())
1452}
1453
1454impl Store {
1455 /// Repair file permissions on the keystore directory and every file
1456 /// inside it: dir to 0700, key entry files and manifest to 0600.
1457 /// Used by `treeship doctor --fix`. No-op on non-Unix.
1458 ///
1459 /// Returns the list of (path, old_mode, new_mode) tuples for paths
1460 /// that were actually changed, so the caller can report what it did.
1461 pub fn fix_perms(&self) -> Result<Vec<(PathBuf, u32, u32)>, KeyError> {
1462 let mut changed: Vec<(PathBuf, u32, u32)> = Vec::new();
1463 #[cfg(unix)]
1464 {
1465 use std::os::unix::fs::PermissionsExt;
1466
1467 let dir_meta = fs::metadata(&self.dir)?;
1468 let dir_mode = dir_meta.permissions().mode() & 0o777;
1469 if dir_mode != 0o700 {
1470 fs::set_permissions(&self.dir, fs::Permissions::from_mode(0o700))?;
1471 changed.push((self.dir.clone(), dir_mode, 0o700));
1472 }
1473
1474 for entry in fs::read_dir(&self.dir)? {
1475 let entry = entry?;
1476 let path = entry.path();
1477 if !entry.file_type()?.is_file() {
1478 continue;
1479 }
1480 let mode = entry.metadata()?.permissions().mode() & 0o777;
1481 if mode != 0o600 {
1482 fs::set_permissions(&path, fs::Permissions::from_mode(0o600))?;
1483 changed.push((path, mode, 0o600));
1484 }
1485 }
1486 }
1487 Ok(changed)
1488 }
1489}
1490
1491/// Open (or create) the per-entry migration sentinel lock file with
1492/// owner-only permissions (0o600 on Unix). The handle returned can be
1493/// passed to `fs2::FileExt::lock_exclusive` to serialize concurrent
1494/// v1->v2 migrations of the same entry across processes/threads
1495/// (TS-2026-001 H3).
1496///
1497/// On Unix the mode is set at creation via `OpenOptionsExt::mode` so the
1498/// sentinel never has a moment of looser perms. On non-Unix platforms the
1499/// file inherits parent ACLs (the keystore dir is owner-scoped already).
1500#[cfg(unix)]
1501fn open_migration_lock_file(path: &Path) -> Result<fs::File, io::Error> {
1502 use std::os::unix::fs::OpenOptionsExt;
1503 fs::OpenOptions::new()
1504 .create(true)
1505 .read(true)
1506 .write(true)
1507 .truncate(false)
1508 .mode(0o600)
1509 .open(path)
1510}
1511
1512#[cfg(not(unix))]
1513fn open_migration_lock_file(path: &Path) -> Result<fs::File, io::Error> {
1514 fs::OpenOptions::new()
1515 .create(true)
1516 .read(true)
1517 .write(true)
1518 .truncate(false)
1519 .open(path)
1520}
1521
1522/// Atomically write `data` to `path` with owner-only (0o600) permissions on
1523/// Unix.
1524///
1525/// TS-2026-001 H1 + H2: the prior implementation was truncate-then-write,
1526/// which destroys the original file if the process crashes mid-write. For
1527/// the keystore that's catastrophic -- a crash during transparent v1->v2
1528/// migration would leave a zero-byte (or partial) key entry on disk and
1529/// the private key would be unrecoverable. This implementation writes to
1530/// a sibling tmp file in the same directory, fsyncs the bytes through to
1531/// the platter, then performs a POSIX-atomic same-filesystem `rename(2)`.
1532/// A crash before the rename leaves the original file intact; the tmp
1533/// file is harmless garbage that the next successful write will overwrite.
1534///
1535/// The 0o600 mode is set at file *creation* via `OpenOptionsExt::mode`
1536/// so there is no window in which the file exists with looser perms.
1537/// The prior `set_permissions` post-write call is dropped because it was
1538/// redundant and gave the appearance (but not the substance) of safety.
1539fn write_file_600(path: &Path, data: &[u8]) -> Result<(), KeyError> {
1540 // Place the tmp file in the same directory as the final path so the
1541 // rename stays on the same filesystem (cross-FS renames are not atomic
1542 // and degrade to copy+unlink, defeating the whole point).
1543 let tmp_path = path.with_extension("tmp");
1544
1545 // Best-effort cleanup of any stale tmp from a prior crash before we
1546 // start writing. Ignored on error -- if it doesn't exist that's fine,
1547 // and if it can't be removed the OpenOptions call below will surface
1548 // the underlying error.
1549 let _ = fs::remove_file(&tmp_path);
1550
1551 let write_result: Result<(), KeyError> = (|| {
1552 #[cfg(unix)]
1553 let open = {
1554 use std::os::unix::fs::OpenOptionsExt;
1555 fs::OpenOptions::new()
1556 .write(true)
1557 .create(true)
1558 .truncate(true)
1559 .mode(0o600)
1560 .open(&tmp_path)
1561 };
1562 #[cfg(not(unix))]
1563 let open = fs::OpenOptions::new()
1564 .write(true)
1565 .create(true)
1566 .truncate(true)
1567 .open(&tmp_path);
1568
1569 let mut f = open?;
1570 f.write_all(data)?;
1571 // sync_all flushes both data AND metadata, so on a crash after
1572 // the rename, fsck/journal recovery sees the new bytes -- not a
1573 // ghost inode with stale content.
1574 f.sync_all()?;
1575 Ok(())
1576 })();
1577
1578 if let Err(e) = write_result {
1579 // Best-effort cleanup so the next write isn't surprised by a
1580 // half-written tmp. Errors here are not surfaced: the original
1581 // write error is what the caller needs to see.
1582 let _ = fs::remove_file(&tmp_path);
1583 return Err(e);
1584 }
1585
1586 // Atomic same-filesystem rename. On Unix this is a single
1587 // rename(2) syscall guaranteed by POSIX to be atomic with respect
1588 // to other observers. On Windows std::fs::rename is implemented
1589 // via MoveFileEx with MOVEFILE_REPLACE_EXISTING (atomic on NTFS,
1590 // best-effort elsewhere). After this returns Ok, the new bytes are
1591 // visible at `path` and the tmp file no longer exists.
1592 if let Err(e) = fs::rename(&tmp_path, path) {
1593 let _ = fs::remove_file(&tmp_path);
1594 return Err(KeyError::Io(e));
1595 }
1596
1597 // fsync the parent directory so the rename's directory-entry update
1598 // is itself persisted. The previous code only fsynced the tmp
1599 // file's contents (via sync_all on the file handle) -- on ext4/xfs
1600 // with default mount options, the rename can return to userspace
1601 // before the dirent metadata has been written to the journal. A
1602 // power loss in that window leaves the directory entry pointing at
1603 // the OLD inode (or, worse, missing entirely if both old and new
1604 // were unlinked from the parent), even though both the data bytes
1605 // and the rename syscall ostensibly completed. The H1 doc-comment
1606 // above promised stronger durability than the code delivered;
1607 // fsyncing the parent dir closes that gap.
1608 //
1609 // Best-effort on Unix: a directory open + sync_all is the standard
1610 // pattern (see e.g. SQLite's atomic-commit, leveldb, lmdb). On
1611 // platforms where opening a directory for sync isn't supported, we
1612 // silently skip -- the rename is still atomic-with-respect-to-
1613 // observers, we just don't guarantee crash-durability of the
1614 // dirent update.
1615 #[cfg(unix)]
1616 {
1617 if let Some(parent) = path.parent() {
1618 // Errors here are non-fatal: the rename succeeded and the
1619 // common case (no power loss before the next fs flush) is
1620 // correct. We surface a failure to open/sync the dir only
1621 // if the rename itself succeeded, since otherwise the
1622 // caller would mistake a durability hint for a write
1623 // failure. swallow silently rather than return.
1624 if let Ok(dir) = fs::File::open(parent) {
1625 let _ = dir.sync_all();
1626 }
1627 }
1628 }
1629
1630 Ok(())
1631}
1632
1633fn unix_now() -> u64 {
1634 use std::time::{SystemTime, UNIX_EPOCH};
1635 SystemTime::now()
1636 .duration_since(UNIX_EPOCH)
1637 .unwrap_or_default()
1638 .as_secs()
1639}
1640
1641#[cfg(test)]
1642mod tests {
1643 use super::*;
1644
1645 fn temp_dir_path() -> PathBuf {
1646 let mut p = std::env::temp_dir();
1647 p.push(format!("treeship-test-{}", {
1648 let mut b = [0u8; 4];
1649 // v0.10.4 P1 audit: thread_rng acceptable here. This is a
1650 // test-only temp-dir suffix to avoid collisions between parallel
1651 // test runs. Not a cryptographic input; entropy quality irrelevant.
1652 rand::thread_rng().fill_bytes(&mut b);
1653 hex_encode(&b)
1654 }));
1655 p
1656 }
1657
1658 fn make_store() -> (Store, PathBuf) {
1659 let dir = temp_dir_path();
1660 let store = Store::open(&dir).unwrap();
1661 (store, dir)
1662 }
1663
1664 fn cleanup(dir: PathBuf) {
1665 let _ = fs::remove_dir_all(dir);
1666 }
1667
1668 #[test]
1669 fn generate_key() {
1670 let (store, dir) = make_store();
1671 let info = store.generate(true).unwrap();
1672 assert!(info.id.starts_with("key_"));
1673 assert_eq!(info.algorithm, "ed25519");
1674 assert!(!info.fingerprint.is_empty());
1675 assert_eq!(info.public_key.len(), 32);
1676 cleanup(dir);
1677 }
1678
1679 #[test]
1680 fn default_signer_works() {
1681 let (store, dir) = make_store();
1682 store.generate(true).unwrap();
1683 let signer = store.default_signer().unwrap();
1684 assert!(!signer.key_id().is_empty());
1685 let pae = crate::attestation::pae("text/plain", b"test");
1686 let sig = signer.sign(&pae).unwrap();
1687 assert_eq!(sig.len(), 64);
1688 cleanup(dir);
1689 }
1690
1691 #[test]
1692 fn encrypt_decrypt_roundtrip() {
1693 // Routes the legacy public API through the dispatcher; v1
1694 // ciphertexts must still decrypt correctly.
1695 let key = [42u8; 32];
1696 let plaintext = b"super secret private key material here!";
1697 let (enc, nonce) = aes_gcm_encrypt(&key, plaintext).unwrap();
1698 let dec = aes_gcm_decrypt(&key, &enc, &nonce).unwrap();
1699 assert_eq!(dec, plaintext);
1700 }
1701
1702 #[test]
1703 fn decrypt_wrong_key_fails() {
1704 let key = [42u8; 32];
1705 let wrong = [99u8; 32];
1706 let (enc, nonce) = aes_gcm_encrypt(&key, b"secret").unwrap();
1707 assert!(aes_gcm_decrypt(&wrong, &enc, &nonce).is_err());
1708 }
1709
1710 // --- v2 AEAD tests (TS-2026-001 fix) -----------------------------------
1711
1712 // Fixed entry id + pubkey for the unit-level v2 tests below. The AAD
1713 // builder binds these into the GCM tag, so encrypt and decrypt must
1714 // see identical values. Using constants keeps each test focused on
1715 // its own bit-flip / tamper assertion without dragging Store setup
1716 // into the picture.
1717 const TEST_ENTRY_ID: &str = "key_unit_test_entry_0001";
1718 const TEST_PUBLIC_KEY: &[u8; 32] = &[0xAA; 32];
1719
1720 #[test]
1721 fn v2_encrypt_decrypt_roundtrip() {
1722 let key = [7u8; 32];
1723 let plaintext = b"super secret private key material here!";
1724 let blob =
1725 encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, plaintext).unwrap();
1726 // Structural check on the framing.
1727 assert_eq!(blob[0], KEYSTORE_MAGIC, "magic byte");
1728 assert_eq!(blob[1], KEYSTORE_VERSION_V2, "version byte");
1729 assert_eq!(blob.len(), 2 + 12 + plaintext.len() + 16,
1730 "magic+version+nonce+ct+tag length");
1731
1732 let dec =
1733 decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]).unwrap();
1734 assert_eq!(&*dec, plaintext);
1735 }
1736
1737 #[test]
1738 fn v2_decrypt_wrong_key_fails() {
1739 let key = [7u8; 32];
1740 let wrong = [99u8; 32];
1741 let blob = encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"secret").unwrap();
1742 // Wrong key with v2 framing: AEAD must reject. Dispatcher will
1743 // try v1 fallback (which also fails on garbage), so the final
1744 // error surfaces as a MAC failure rather than wrong plaintext.
1745 let result = decrypt_from_disk(&wrong, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]);
1746 assert!(result.is_err(), "wrong key must fail");
1747 }
1748
1749 #[test]
1750 fn v2_tamper_ciphertext_fails() {
1751 let key = [7u8; 32];
1752 let mut blob = encrypt_for_disk_v2(
1753 &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"super secret private key"
1754 ).unwrap();
1755 // Flip one bit inside the ciphertext body (after the 14-byte
1756 // framing). GCM authenticates ciphertext + nonce; any flip must
1757 // fail.
1758 let last = blob.len() - 5;
1759 blob[last] ^= 0x01;
1760 let result = decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]);
1761 assert!(result.is_err(), "tampered ciphertext must fail to decrypt");
1762 }
1763
1764 #[test]
1765 fn v2_tamper_nonce_fails() {
1766 let key = [7u8; 32];
1767 let mut blob = encrypt_for_disk_v2(
1768 &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"super secret private key"
1769 ).unwrap();
1770 // Flip a bit in the nonce (bytes [2..14]).
1771 blob[5] ^= 0x01;
1772 let result = decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]);
1773 assert!(result.is_err(), "tampered nonce must fail to decrypt");
1774 }
1775
1776 #[test]
1777 fn v2_tamper_tag_fails() {
1778 let key = [7u8; 32];
1779 let mut blob = encrypt_for_disk_v2(
1780 &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"super secret private key"
1781 ).unwrap();
1782 // Flip a bit in the trailing GCM tag (last 16 bytes).
1783 let len = blob.len();
1784 blob[len - 1] ^= 0x80;
1785 let result = decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]);
1786 assert!(result.is_err(), "tampered GCM tag must fail to decrypt");
1787 }
1788
1789 #[test]
1790 fn v2_nonces_are_unique_across_writes() {
1791 // Sanity check: two encryptions of identical plaintext under the
1792 // same key must produce different blobs (random per-write nonce).
1793 // Without this property, AES-GCM is catastrophically broken.
1794 let key = [7u8; 32];
1795 let blob_a =
1796 encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"identical").unwrap();
1797 let blob_b =
1798 encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"identical").unwrap();
1799 assert_ne!(blob_a, blob_b,
1800 "two v2 encryptions of the same plaintext must differ");
1801 assert_ne!(&blob_a[2..14], &blob_b[2..14], "nonces must differ");
1802
1803 // L1 (TS-2026-001 audit): draw 10k nonces in a row and assert
1804 // every one is distinct. A duplicate at this volume would be a
1805 // strong (10k^2 / 2^96 ~ 2^-65 floor) signal that the OS CSPRNG
1806 // backing aead::OsRng is misbehaving on this build. Cheap, fast,
1807 // and catches a regression class (PRNG mis-seeding,
1808 // accidentally-deterministic nonce, RNG getting forked across
1809 // threads without re-seed) that the 2-sample check above can't.
1810 const N: usize = 10_000;
1811 let mut nonces: std::collections::HashSet<Vec<u8>> =
1812 std::collections::HashSet::with_capacity(N);
1813 for _ in 0..N {
1814 let blob =
1815 encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"x").unwrap();
1816 // bytes [2..14] are the 12-byte GCM nonce.
1817 nonces.insert(blob[2..14].to_vec());
1818 }
1819 assert_eq!(
1820 nonces.len(),
1821 N,
1822 "all {} v2 nonces must be unique; collision => RNG defect",
1823 N
1824 );
1825 }
1826
1827 #[test]
1828 fn v2_tamper_version_byte_fails() {
1829 // M2: flipping the version byte must cause decryption to fail.
1830 // The framing sanity check catches obvious flips immediately;
1831 // the AAD-binding test below covers the case where the framing
1832 // sanity check would otherwise pass.
1833 let key = [7u8; 32];
1834 let mut blob = encrypt_for_disk_v2(
1835 &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"super secret private key"
1836 ).unwrap();
1837 assert_eq!(blob[1], KEYSTORE_VERSION_V2);
1838 blob[1] = 0xff;
1839 assert!(
1840 decrypt_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob).is_err(),
1841 "altered version byte must be rejected"
1842 );
1843 }
1844
1845 #[test]
1846 fn v2_aad_binding_detects_framing_substitution() {
1847 // M2 direct check: encrypt a payload with v2 AAD, then construct
1848 // a blob whose framing claims to be v2 but whose ciphertext was
1849 // computed under a different AAD (empty). decrypt_v2 must
1850 // reject with MAC failure rather than returning the plaintext.
1851 let key = [7u8; 32];
1852 let plaintext = b"M2 AAD bound material";
1853
1854 // Compute a v2-framed blob without supplying AAD -- mimics what
1855 // the *pre-M2* code would have produced. This is the exact
1856 // attack surface AAD closes: an old blob whose framing is v2
1857 // but whose tag was computed empty.
1858 use aes_gcm::aead::Aead;
1859 let key_buf: Zeroizing<[u8; 32]> = Zeroizing::new(key);
1860 let aead_key: &AesKey<Aes256Gcm> = AesKey::<Aes256Gcm>::from_slice(key_buf.as_slice());
1861 let cipher = Aes256Gcm::new(aead_key);
1862 let nonce = Aes256Gcm::generate_nonce(&mut AeadOsRng);
1863 let ct_no_aad = cipher.encrypt(&nonce, plaintext.as_slice()).unwrap();
1864
1865 let mut forged = Vec::with_capacity(2 + 12 + ct_no_aad.len());
1866 forged.push(KEYSTORE_MAGIC);
1867 forged.push(KEYSTORE_VERSION_V2);
1868 forged.extend_from_slice(nonce.as_slice());
1869 forged.extend_from_slice(&ct_no_aad);
1870
1871 // Framing sanity passes. AAD does not. decrypt_v2 must reject.
1872 assert_eq!(forged[0], KEYSTORE_MAGIC);
1873 assert_eq!(forged[1], KEYSTORE_VERSION_V2);
1874 let result = decrypt_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &forged);
1875 assert!(result.is_err(),
1876 "ciphertext computed without AAD must fail to decrypt now that AAD is bound");
1877 }
1878
1879 #[test]
1880 fn dispatcher_surfaces_v2_error_on_corrupted_v2_blob() {
1881 // M1: a v2-shaped blob whose AEAD verification fails (and
1882 // whose v1 fallback also fails, since the bytes are garbage
1883 // under both constructions) must surface the v2 MAC error, not
1884 // the v1 "ciphertext too short" / random-junk error. The user
1885 // sees a meaningful message that points at the right
1886 // remediation.
1887 let key = [7u8; 32];
1888 let mut blob =
1889 encrypt_for_disk_v2(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, b"hello").unwrap();
1890 // Flip a byte in the GCM tag (last 16 bytes) so the v2 AEAD
1891 // rejects but the framing still classifies as v2.
1892 let last = blob.len() - 1;
1893 blob[last] ^= 0x01;
1894
1895 let err =
1896 decrypt_from_disk(&key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &blob, &[]).unwrap_err();
1897 // The dispatcher should bubble the v2 error string up. v2's
1898 // error message contains "MAC verification failed"; v1's
1899 // shape on garbage data is either "ciphertext too short" or
1900 // a different MAC error. Match on the v2-specific tail.
1901 assert!(
1902 err.contains("MAC verification failed"),
1903 "dispatcher must surface the v2 MAC error on corrupted v2 blob, got: {err}"
1904 );
1905 }
1906
1907 #[test]
1908 fn legacy_v1_ciphertext_still_decrypts_via_dispatcher() {
1909 // Simulates an on-disk keystore written by Treeship <= v0.10.2:
1910 // the dispatcher must successfully route legacy ciphertexts
1911 // through the v1 path so existing users are not locked out.
1912 let key = [13u8; 32];
1913 let plaintext = b"pre-v0.10.3 keystore entry";
1914 let (legacy_blob, legacy_nonce) =
1915 legacy_v1_encrypt(&key, plaintext).unwrap();
1916
1917 // Sanity: legacy blob does NOT start with v2 framing.
1918 assert!(is_legacy_v1(&legacy_blob),
1919 "legacy_v1_encrypt output must classify as legacy");
1920
1921 // Dispatcher must accept it. AAD inputs are irrelevant for the
1922 // v1 path (it doesn't use them), but the signature requires them
1923 // — pass the same placeholder constants used elsewhere.
1924 let dec = decrypt_from_disk(
1925 &key, TEST_ENTRY_ID, TEST_PUBLIC_KEY, &legacy_blob, &legacy_nonce,
1926 )
1927 .unwrap();
1928 assert_eq!(&*dec, plaintext);
1929 }
1930
1931 #[test]
1932 fn store_signer_migrates_legacy_entry_to_v2() {
1933 // End-to-end: write a key entry with the legacy v1 ciphertext
1934 // (as if upgrading from v0.10.2), call `signer()`, then verify
1935 // the on-disk entry has been rewritten in v2 format.
1936 let (store, dir) = make_store();
1937
1938 // Generate normally (this writes v2). Then re-encrypt the
1939 // secret in v1 format and overwrite the entry on disk to
1940 // simulate the upgrade scenario.
1941 let info = store.generate(true).unwrap();
1942 let entry_path = store.entry_path(&info.id);
1943
1944 // Pull the v2 entry off disk, decrypt to recover the secret,
1945 // then re-encode in legacy v1 format and write it back.
1946 let v2_entry: EncryptedEntry =
1947 serde_json::from_slice(&fs::read(&entry_path).unwrap()).unwrap();
1948 let secret = decrypt_from_disk(
1949 &store.machine_key,
1950 &v2_entry.id,
1951 &v2_entry.public_key,
1952 &v2_entry.enc_priv_key,
1953 &v2_entry.nonce,
1954 )
1955 .unwrap();
1956 let (legacy_blob, legacy_nonce) =
1957 legacy_v1_encrypt(&store.machine_key, &secret).unwrap();
1958 let legacy_entry = EncryptedEntry {
1959 id: v2_entry.id.clone(),
1960 algorithm: v2_entry.algorithm.clone(),
1961 created_at: v2_entry.created_at.clone(),
1962 public_key: v2_entry.public_key.clone(),
1963 enc_priv_key: legacy_blob,
1964 nonce: legacy_nonce,
1965 valid_until: v2_entry.valid_until.clone(),
1966 successor_key_id: v2_entry.successor_key_id.clone(),
1967 };
1968 fs::write(&entry_path, serde_json::to_vec_pretty(&legacy_entry).unwrap()).unwrap();
1969
1970 // Reload with a fresh Store so the cache doesn't paper over the
1971 // on-disk change.
1972 let store2 = Store::open(&dir).unwrap();
1973 // Loading the signer must succeed (legacy path works) AND
1974 // trigger the transparent migration to v2.
1975 let _signer = store2.signer(&info.id).unwrap();
1976
1977 let after: EncryptedEntry =
1978 serde_json::from_slice(&fs::read(&entry_path).unwrap()).unwrap();
1979 assert!(!is_legacy_v1(&after.enc_priv_key),
1980 "post-migration entry must be in v2 format");
1981 assert_eq!(after.enc_priv_key[0], KEYSTORE_MAGIC);
1982 assert_eq!(after.enc_priv_key[1], KEYSTORE_VERSION_V2);
1983 assert!(after.nonce.is_empty(),
1984 "v2 entries serialize an empty legacy nonce field");
1985
1986 // L2 (TS-2026-001 audit): the framing check above proves the
1987 // migrator *wrote* a v2-shaped blob, but a downstream
1988 // assert_eq! on framing alone doesn't prove the v2 ciphertext
1989 // is actually a working AEAD encryption of the right secret.
1990 // Load the signer one more time through a fresh Store; this
1991 // routes through the dispatcher's v2-first branch and would
1992 // fail loudly if the migration had produced garbage.
1993 let store3 = Store::open(&dir).unwrap();
1994 let _signer = store3
1995 .signer(&info.id)
1996 .expect("post-migration v2 decrypt works");
1997
1998 cleanup(dir);
1999 }
2000
2001 #[test]
2002 fn persist_and_reload() {
2003 let (store, dir) = make_store();
2004 let info = store.generate(true).unwrap();
2005
2006 // Open a new Store instance pointing to the same directory.
2007 let store2 = Store::open(&dir).unwrap();
2008 let signer = store2.signer(&info.id).unwrap();
2009 assert_eq!(signer.key_id(), info.id);
2010
2011 // The reloaded signer must produce signatures verifiable with
2012 // the same public key.
2013 let verifier = {
2014 use crate::attestation::Verifier;
2015 use ed25519_dalek::VerifyingKey;
2016 let pk_bytes: [u8; 32] = info.public_key.try_into().unwrap();
2017 let vk = VerifyingKey::from_bytes(&pk_bytes).unwrap();
2018 let mut v = Verifier::new(std::collections::HashMap::new());
2019 v.add_key(info.id.clone(), vk);
2020 v
2021 };
2022
2023 use crate::attestation::sign;
2024 use crate::statements::ActionStatement;
2025 let stmt = ActionStatement::new("agent://test", "tool.call");
2026 let pt = crate::statements::payload_type("action");
2027 let signed = sign(&pt, &stmt, signer.as_ref()).unwrap();
2028 verifier.verify(&signed.envelope).unwrap();
2029
2030 cleanup(dir);
2031 }
2032
2033 #[test]
2034 fn list_keys() {
2035 let (store, dir) = make_store();
2036 store.generate(true).unwrap();
2037 store.generate(false).unwrap();
2038
2039 let keys = store.list().unwrap();
2040 assert_eq!(keys.len(), 2);
2041 assert_eq!(keys.iter().filter(|k| k.is_default).count(), 1);
2042 cleanup(dir);
2043 }
2044
2045 #[test]
2046 fn no_default_key_errors() {
2047 let (store, dir) = make_store();
2048 assert!(store.default_signer().is_err());
2049 cleanup(dir);
2050 }
2051
2052 #[test]
2053 fn rotate_mints_successor_and_links_predecessor() {
2054 let (store, dir) = make_store();
2055 let pred = store.generate(true).unwrap();
2056 assert!(pred.valid_until.is_none(), "fresh key has no expiry");
2057 assert!(pred.successor_key_id.is_none(), "fresh key has no successor");
2058
2059 let result = store
2060 .rotate(None, std::time::Duration::from_secs(3600), true)
2061 .unwrap();
2062
2063 // Predecessor metadata is updated.
2064 assert_eq!(result.predecessor.id, pred.id);
2065 assert!(result.predecessor.valid_until.is_some(),
2066 "predecessor must get valid_until after rotation");
2067 assert_eq!(result.predecessor.successor_key_id.as_deref(),
2068 Some(result.successor.id.as_str()),
2069 "predecessor must link forward to successor");
2070 assert!(!result.predecessor.is_default,
2071 "after rotation with set_default=true, predecessor is no longer default");
2072
2073 // Successor is fresh.
2074 assert_ne!(result.successor.id, pred.id);
2075 assert!(result.successor.valid_until.is_none(), "successor has no expiry yet");
2076 assert!(result.successor.successor_key_id.is_none(), "successor is chain head");
2077 assert!(result.successor.is_default, "successor is the new default");
2078
2079 // Same metadata visible via list().
2080 let listed = store.list().unwrap();
2081 assert_eq!(listed.len(), 2);
2082 let pred_listed = listed.iter().find(|k| k.id == pred.id).unwrap();
2083 assert!(pred_listed.valid_until.is_some());
2084 assert_eq!(pred_listed.successor_key_id.as_deref(),
2085 Some(result.successor.id.as_str()));
2086
2087 cleanup(dir);
2088 }
2089
2090 #[test]
2091 fn rotate_with_set_default_false_keeps_predecessor_active() {
2092 let (store, dir) = make_store();
2093 let pred = store.generate(true).unwrap();
2094
2095 let result = store
2096 .rotate(None, std::time::Duration::from_secs(3600), false)
2097 .unwrap();
2098
2099 // Predecessor is still default. Successor exists but is not default.
2100 assert!(result.predecessor.is_default);
2101 assert!(!result.successor.is_default);
2102 assert_eq!(store.default_key_id().unwrap(), pred.id);
2103
2104 cleanup(dir);
2105 }
2106
2107 #[test]
2108 fn rotate_predecessor_signing_still_works_during_grace_window() {
2109 let (store, dir) = make_store();
2110 let pred = store.generate(true).unwrap();
2111 let _ = store
2112 .rotate(None, std::time::Duration::from_secs(3600), true)
2113 .unwrap();
2114
2115 // Predecessor key must still be loadable and capable of signing
2116 // during its grace window. Verifiers can refuse on lifecycle, but
2117 // the keystore must not preemptively destroy material.
2118 let signer = store.signer(&pred.id).unwrap();
2119 let pae = crate::attestation::pae("text/plain", b"grace-window-payload");
2120 let sig = signer.sign(&pae).unwrap();
2121 assert_eq!(sig.len(), 64);
2122
2123 cleanup(dir);
2124 }
2125
2126 #[test]
2127 fn rotate_refuses_to_rotate_already_rotated_key() {
2128 let (store, dir) = make_store();
2129 store.generate(true).unwrap();
2130 let r1 = store
2131 .rotate(None, std::time::Duration::from_secs(60), true)
2132 .unwrap();
2133
2134 // Rotating the predecessor again must be refused -- it already
2135 // points at r1.successor. Caller should rotate the chain head.
2136 let err = store
2137 .rotate(Some(&r1.predecessor.id),
2138 std::time::Duration::from_secs(60),
2139 true)
2140 .unwrap_err();
2141 match err {
2142 KeyError::Crypto(msg) => assert!(
2143 msg.contains("already been rotated"),
2144 "error must explain why: {msg}"
2145 ),
2146 other => panic!("expected Crypto error, got {other:?}"),
2147 }
2148 cleanup(dir);
2149 }
2150
2151 #[test]
2152 fn successor_chain_walks_forward() {
2153 let (store, dir) = make_store();
2154 let k0 = store.generate(true).unwrap();
2155 let r1 = store
2156 .rotate(None, std::time::Duration::from_secs(60), true)
2157 .unwrap();
2158 let r2 = store
2159 .rotate(None, std::time::Duration::from_secs(60), true)
2160 .unwrap();
2161
2162 let chain = store.successor_chain(&k0.id).unwrap();
2163 assert_eq!(chain, vec![k0.id.clone(), r1.successor.id.clone(), r2.successor.id.clone()],
2164 "chain must be ordered head -> tail");
2165
2166 // Mid-chain start: chain from r1.successor should drop k0.
2167 let mid = store.successor_chain(&r1.successor.id).unwrap();
2168 assert_eq!(mid, vec![r1.successor.id.clone(), r2.successor.id.clone()]);
2169
2170 // Tail: just itself.
2171 let tail = store.successor_chain(&r2.successor.id).unwrap();
2172 assert_eq!(tail, vec![r2.successor.id.clone()]);
2173
2174 cleanup(dir);
2175 }
2176
2177 #[test]
2178 fn valid_keys_at_filters_by_grace_window() {
2179 let (store, dir) = make_store();
2180 let _ = store.generate(true).unwrap();
2181 let result = store
2182 .rotate(None, std::time::Duration::from_secs(3600), true)
2183 .unwrap();
2184
2185 // At time-of-rotation, both keys must be valid -- predecessor is
2186 // mid-grace, successor is freshly minted.
2187 let now = unix_now();
2188 let valid_now = store.valid_keys_at(now).unwrap();
2189 assert_eq!(valid_now.len(), 2, "both predecessor (in grace) and successor should be valid");
2190
2191 // After the grace window expires, only the successor remains.
2192 let after_grace = unix_now() + 7200;
2193 let valid_after = store.valid_keys_at(after_grace).unwrap();
2194 assert_eq!(valid_after.len(), 1,
2195 "after grace window only successor remains valid");
2196 assert_eq!(valid_after[0].id, result.successor.id);
2197
2198 cleanup(dir);
2199 }
2200
2201 /// Regression: if the successor key file is missing on disk (because a
2202 /// prior rotate() crashed AFTER stamping the predecessor but BEFORE
2203 /// writing the successor), retrying must NOT be wedged. With the
2204 /// successor-first write order this scenario can't be reached by a
2205 /// single-process crash, but we still need to defend against an operator
2206 /// who manually deletes a successor file mid-life. The recovery path
2207 /// is: clear the predecessor's successor pointer (or restore the file
2208 /// from backup) and try again.
2209 /// Regression: even if the manifest write FAILED (say, disk full at
2210 /// the worst possible moment), the in-memory cache must reflect the
2211 /// stamped predecessor that already landed on disk -- otherwise a
2212 /// same-process retry would skip the already-rotated guard and mint
2213 /// a duplicate successor.
2214 ///
2215 /// We can't easily inject a manifest-write failure mid-test, but we
2216 /// can verify the precondition that makes the recovery work: after a
2217 /// successful rotate(), the cache holds the stamped predecessor (so
2218 /// any subsequent rotate would correctly refuse). Combined with the
2219 /// write order (cache update BEFORE manifest write in rotate()),
2220 /// this proves a manifest-write crash leaves the cache aligned with
2221 /// disk, not behind it.
2222 #[test]
2223 fn rotate_cache_reflects_stamped_predecessor_for_retry_safety() {
2224 let (store, dir) = make_store();
2225 let pred = store.generate(true).unwrap();
2226 let _ = store
2227 .rotate(None, std::time::Duration::from_secs(60), true)
2228 .unwrap();
2229
2230 // The cache must have the stamped predecessor; a same-process
2231 // retry of rotate(predecessor) MUST be refused. If the cache
2232 // were stale (still showing the unstamped predecessor), this
2233 // call would proceed and mint a duplicate successor.
2234 let err = store
2235 .rotate(Some(&pred.id),
2236 std::time::Duration::from_secs(60),
2237 true)
2238 .unwrap_err();
2239 match err {
2240 KeyError::Crypto(msg) => assert!(
2241 msg.contains("already been rotated"),
2242 "cache should reflect stamped predecessor; got: {msg}"
2243 ),
2244 other => panic!("expected Crypto error, got {other:?}"),
2245 }
2246
2247 cleanup(dir);
2248 }
2249
2250 #[test]
2251 fn rotated_predecessor_pointing_at_missing_successor_surfaces_clear_error() {
2252 let (store, dir) = make_store();
2253 store.generate(true).unwrap();
2254 let result = store
2255 .rotate(None, std::time::Duration::from_secs(60), true)
2256 .unwrap();
2257
2258 // Simulate operator-deleted successor file. The manifest still
2259 // references it, so a cold-cache reader trying to walk the chain
2260 // hits a clear NotFound for the missing key.
2261 let succ_path = store.entry_path(&result.successor.id);
2262 fs::remove_file(&succ_path).unwrap();
2263
2264 // Open a fresh Store instance so the cache doesn't paper over the
2265 // missing on-disk entry. successor_chain() walks via load_entry;
2266 // the missing file must produce KeyError::NotFound, not a panic
2267 // and not an infinite loop.
2268 let store2 = Store::open(&dir).unwrap();
2269 let err = store2.successor_chain(&result.predecessor.id).unwrap_err();
2270 match err {
2271 KeyError::NotFound(id) => assert_eq!(id, result.successor.id),
2272 other => panic!("expected NotFound error, got {other:?}"),
2273 }
2274
2275 cleanup(dir);
2276 }
2277
2278 /// Pre-0.9.5 entry files lack `valid_until` and `successor_key_id`.
2279 /// They must still deserialize cleanly and be visible via `list()` /
2280 /// `default_signer()` etc.
2281 #[test]
2282 fn legacy_entry_without_lifecycle_fields_loads() {
2283 let (store, dir) = make_store();
2284 let info = store.generate(true).unwrap();
2285
2286 // Re-serialize the on-disk entry without the new fields, simulating
2287 // a file created by a 0.9.4 or earlier CLI.
2288 let path = store.entry_path(&info.id);
2289 let raw = fs::read(&path).unwrap();
2290 let mut json: serde_json::Value = serde_json::from_slice(&raw).unwrap();
2291 let obj = json.as_object_mut().unwrap();
2292 obj.remove("valid_until");
2293 obj.remove("successor_key_id");
2294 fs::write(&path, serde_json::to_vec_pretty(&json).unwrap()).unwrap();
2295
2296 // A fresh Store (cold cache) must still load the entry and treat
2297 // the missing fields as None.
2298 let store2 = Store::open(&dir).unwrap();
2299 let listed = store2.list().unwrap();
2300 assert_eq!(listed.len(), 1);
2301 assert!(listed[0].valid_until.is_none(),
2302 "missing valid_until must default to None on legacy entry");
2303 assert!(listed[0].successor_key_id.is_none(),
2304 "missing successor_key_id must default to None on legacy entry");
2305 let signer = store2.default_signer().unwrap();
2306 assert_eq!(signer.key_id(), info.id);
2307
2308 cleanup(dir);
2309 }
2310
2311 // --- keystore permission hardening (PR 1) -------------------------------
2312
2313 // The perm tests below mutate the process-global env var
2314 // TREESHIP_ALLOW_INSECURE_KEY_PERMS. cargo test runs cases in
2315 // parallel by default, so without serialization one test can set
2316 // the bypass while another expects it unset and racefully fail.
2317 // This mutex serializes them; everything else in the file remains
2318 // parallel-safe.
2319 static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
2320
2321 #[test]
2322 #[cfg(unix)]
2323 fn write_entry_creates_file_with_0600() {
2324 use std::os::unix::fs::PermissionsExt;
2325 let (store, dir) = make_store();
2326 let info = store.generate(true).unwrap();
2327 let mode = fs::metadata(store.entry_path(&info.id))
2328 .unwrap()
2329 .permissions()
2330 .mode()
2331 & 0o777;
2332 assert_eq!(mode, 0o600, "freshly written key file must be 0600, got {:o}", mode);
2333 cleanup(dir);
2334 }
2335
2336 #[test]
2337 #[cfg(unix)]
2338 fn signer_refuses_world_readable_key() {
2339 use std::os::unix::fs::PermissionsExt;
2340 // Mutex prevents the bypass var from being toggled by a
2341 // sibling test mid-flight (cargo test parallel runner).
2342 let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
2343 // Make sure the bypass var is not leaking from the host env.
2344 std::env::remove_var("TREESHIP_ALLOW_INSECURE_KEY_PERMS");
2345
2346 let (store, dir) = make_store();
2347 let info = store.generate(true).unwrap();
2348
2349 // Loosen perms on the key file -- simulates a checkout, scp, or
2350 // shared-volume mishap.
2351 let path = store.entry_path(&info.id);
2352 fs::set_permissions(&path, fs::Permissions::from_mode(0o644)).unwrap();
2353
2354 match store.signer(&info.id) {
2355 Err(KeyError::InsecureKeyPerms { path: p, mode }) => {
2356 assert_eq!(p, path);
2357 assert_eq!(mode & 0o777, 0o644);
2358 }
2359 other => panic!("expected InsecureKeyPerms, got {:?}", other.map(|_| "ok")),
2360 }
2361 cleanup(dir);
2362 }
2363
2364 #[test]
2365 #[cfg(unix)]
2366 fn signer_bypass_via_env_var() {
2367 use std::os::unix::fs::PermissionsExt;
2368 let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
2369 let (store, dir) = make_store();
2370 let info = store.generate(true).unwrap();
2371 let path = store.entry_path(&info.id);
2372 fs::set_permissions(&path, fs::Permissions::from_mode(0o644)).unwrap();
2373
2374 std::env::set_var("TREESHIP_ALLOW_INSECURE_KEY_PERMS", "1");
2375 let result = store.signer(&info.id);
2376 std::env::remove_var("TREESHIP_ALLOW_INSECURE_KEY_PERMS");
2377
2378 assert!(
2379 result.is_ok(),
2380 "bypass env var must allow signing: {:?}",
2381 result.err()
2382 );
2383 cleanup(dir);
2384 }
2385
2386 // --- v0.10.4 P2: TOCTOU window in signer() perm-check ---------------
2387
2388 /// Structural / single-open proof: the on-disk key file is opened
2389 /// EXACTLY ONCE during `signer()`. The fix replaces the prior
2390 /// `check_key_file_perms(path) + load_entry(id) -> fs::read(path)`
2391 /// two-open shape with `read_entry_with_perm_check`, which opens
2392 /// once and fstat's the resulting fd. We can't reliably race the
2393 /// FS in a unit test, so instead we assert the structural
2394 /// invariant: after `signer()` succeeds, only the bytes that the
2395 /// open file descriptor saw at perm-check time can have been read.
2396 ///
2397 /// The simulation: stage an attacker-controlled "loose perms"
2398 /// envelope at the path, then call `signer()`. With the fixed
2399 /// single-open shape, perm-check on the open fd fails before any
2400 /// content is read -- we get `InsecureKeyPerms`, not a successful
2401 /// signer. The legacy two-open code would have observed the perm
2402 /// failure on the same loose file too, but the property we are
2403 /// pinning here is that the perm rejection comes from the SAME fd
2404 /// the read would have used (no chance for an intermediate swap).
2405 #[test]
2406 #[cfg(unix)]
2407 fn signer_rejects_post_check_swap() {
2408 use std::os::unix::fs::PermissionsExt;
2409 let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
2410 std::env::remove_var("TREESHIP_ALLOW_INSECURE_KEY_PERMS");
2411
2412 let (store, dir) = make_store();
2413 let info = store.generate(true).unwrap();
2414 let path = store.entry_path(&info.id);
2415
2416 // Snapshot the legit (0o600) v2 ciphertext bytes so we can
2417 // confirm that even if an attacker were to swap THIS exact
2418 // content under a loose-perms file, the single-open gate
2419 // catches it on the fd.
2420 let original_bytes = fs::read(&path).unwrap();
2421 assert!(!original_bytes.is_empty(), "test sanity");
2422
2423 // Stage the swapped file: same envelope content (so the JSON
2424 // parses and AEAD would succeed if we got that far), but
2425 // loose perms. With the old two-open shape, an attacker could
2426 // present 0o600 to perm-check, then race in this 0o644
2427 // version before the read; with the new single-open shape,
2428 // we open once, fstat the fd, and reject before reading.
2429 fs::write(&path, &original_bytes).unwrap();
2430 fs::set_permissions(&path, fs::Permissions::from_mode(0o644)).unwrap();
2431
2432 match store.signer(&info.id) {
2433 Err(KeyError::InsecureKeyPerms { path: p, mode }) => {
2434 assert_eq!(p, path);
2435 assert_eq!(mode & 0o777, 0o644);
2436 }
2437 Err(other) => panic!(
2438 "expected InsecureKeyPerms from single-open fstat gate, got {:?}",
2439 other
2440 ),
2441 Ok(_) => panic!(
2442 "expected InsecureKeyPerms from single-open fstat gate, got ok signer"
2443 ),
2444 }
2445
2446 // The "structural" half of the test: invoke the helper
2447 // directly. It must reject on the open fd, never returning
2448 // an `EncryptedEntry`. This pins the no-second-open property
2449 // -- if a future refactor reintroduces a path-based read
2450 // after the perm gate, this assertion still holds (the gate
2451 // would still trip on the same loose fd) but the code review
2452 // diff is the real test for the structural invariant.
2453 let direct = store.read_entry_with_perm_check(&info.id);
2454 assert!(
2455 matches!(direct, Err(KeyError::InsecureKeyPerms { .. })),
2456 "read_entry_with_perm_check must reject before reading bytes; got {:?}",
2457 direct.map(|_| "ok")
2458 );
2459
2460 cleanup(dir);
2461 }
2462
2463 // --- TS-2026-001 H3 migration-lock concurrency test -----------------
2464
2465 /// H3: two threads calling `Store::signer` on the same legacy v1
2466 /// entry must both succeed, the on-disk entry must end up as a
2467 /// valid v2 entry (decryptable via the v2 path), and no `.tmp`
2468 /// fragment must be left in the keystore directory.
2469 ///
2470 /// Without the advisory lock around `migrate_entry_to_v2`, two
2471 /// concurrent migrators would race the read-modify-rename cycle:
2472 /// the loser's rename would clobber the winner's v2 entry with
2473 /// its own (also-valid) v2 entry, but in between the two
2474 /// renames a third reader could observe a v2 entry, decrypt
2475 /// successfully, then have its in-memory state invalidated by
2476 /// the second writer. The flock turns the race into a queue --
2477 /// both writers produce identical v2 plaintext, only one rename
2478 /// per entry is actually needed, and the second writer's
2479 /// post-lock recheck observes the v2 state and exits cleanly.
2480 #[test]
2481 fn concurrent_migration_serializes_correctly() {
2482 use std::sync::Arc;
2483 use std::thread;
2484
2485 // Set up a legacy v1 entry on disk -- same shape as the
2486 // store_signer_migrates_legacy_entry_to_v2 test, just shared
2487 // with two threads.
2488 let (store, dir) = make_store();
2489 let info = store.generate(true).unwrap();
2490 let entry_path = store.entry_path(&info.id);
2491
2492 let v2_entry: EncryptedEntry =
2493 serde_json::from_slice(&fs::read(&entry_path).unwrap()).unwrap();
2494 let secret = decrypt_from_disk(
2495 &store.machine_key,
2496 &v2_entry.id,
2497 &v2_entry.public_key,
2498 &v2_entry.enc_priv_key,
2499 &v2_entry.nonce,
2500 )
2501 .unwrap();
2502 let (legacy_blob, legacy_nonce) =
2503 legacy_v1_encrypt(&store.machine_key, &secret).unwrap();
2504 let legacy_entry = EncryptedEntry {
2505 id: v2_entry.id.clone(),
2506 algorithm: v2_entry.algorithm.clone(),
2507 created_at: v2_entry.created_at.clone(),
2508 public_key: v2_entry.public_key.clone(),
2509 enc_priv_key: legacy_blob,
2510 nonce: legacy_nonce,
2511 valid_until: v2_entry.valid_until.clone(),
2512 successor_key_id: v2_entry.successor_key_id.clone(),
2513 };
2514 fs::write(&entry_path, serde_json::to_vec_pretty(&legacy_entry).unwrap()).unwrap();
2515
2516 // Two independent Store instances racing on the same on-disk
2517 // legacy entry. Using independent Store instances forces the
2518 // lock-on-disk path to engage (a shared Store would serialize
2519 // through the internal RwLock cache and we'd be testing the
2520 // wrong thing).
2521 let dir_a = Arc::new(dir.clone());
2522 let dir_b = Arc::new(dir.clone());
2523 let id_a = info.id.clone();
2524 let id_b = info.id.clone();
2525
2526 let h1 = thread::spawn(move || -> Result<(), String> {
2527 let s = Store::open(&*dir_a).map_err(|e| e.to_string())?;
2528 let _signer = s.signer(&id_a).map_err(|e| e.to_string())?;
2529 Ok(())
2530 });
2531 let h2 = thread::spawn(move || -> Result<(), String> {
2532 let s = Store::open(&*dir_b).map_err(|e| e.to_string())?;
2533 let _signer = s.signer(&id_b).map_err(|e| e.to_string())?;
2534 Ok(())
2535 });
2536
2537 h1.join().unwrap().expect("thread 1 signer load must succeed");
2538 h2.join().unwrap().expect("thread 2 signer load must succeed");
2539
2540 // Post-condition: on-disk entry is v2 framed.
2541 let after: EncryptedEntry =
2542 serde_json::from_slice(&fs::read(&entry_path).unwrap()).unwrap();
2543 assert!(
2544 !is_legacy_v1(&after.enc_priv_key),
2545 "post-concurrent-migration entry must be in v2 format"
2546 );
2547 assert_eq!(after.enc_priv_key[0], KEYSTORE_MAGIC);
2548 assert_eq!(after.enc_priv_key[1], KEYSTORE_VERSION_V2);
2549
2550 // v2 decrypts cleanly. Use the post-migration entry's own id +
2551 // pubkey — the migration must have re-encrypted with those bound
2552 // into the AAD, or this assertion would surface a MAC failure.
2553 let dec = decrypt_v2(
2554 &store.machine_key,
2555 &after.id,
2556 &after.public_key,
2557 &after.enc_priv_key,
2558 )
2559 .expect("v2 entry must decrypt cleanly after concurrent migration");
2560 assert_eq!(dec.len(), 32, "decrypted secret must be a 32-byte ed25519 scalar");
2561
2562 // No stale .tmp file left behind.
2563 for entry in fs::read_dir(&dir).unwrap() {
2564 let p = entry.unwrap().path();
2565 assert!(
2566 p.extension().is_none_or(|e| e != "tmp"),
2567 "no .tmp fragment must remain after migration, found: {}",
2568 p.display()
2569 );
2570 }
2571
2572 cleanup(dir);
2573 }
2574
2575 // --- TS-2026-001 H1 + H2 atomic write tests ------------------------
2576
2577 /// H1: a partial failure between writing the tmp file and renaming
2578 /// it into place MUST leave the original on-disk file intact. We
2579 /// simulate the failure by pre-creating a tmp file (so the next
2580 /// write_file_600 would clobber it) and then independently verifying
2581 /// that an already-written key entry remains decryptable even after
2582 /// a fresh write_file_600 fails partway.
2583 ///
2584 /// We exercise the failure path by pointing the rename at an
2585 /// unwritable target. On Unix we make the *parent directory*
2586 /// read-only after the original key is in place, which causes the
2587 /// final fs::rename to fail with EACCES. The original key file is
2588 /// unaffected because rename(2) returns before touching the target.
2589 #[test]
2590 #[cfg(unix)]
2591 fn atomic_write_leaves_original_intact_on_partial_failure() {
2592 use std::os::unix::fs::PermissionsExt;
2593 let (store, dir) = make_store();
2594 let info = store.generate(true).unwrap();
2595 let entry_path = store.entry_path(&info.id);
2596
2597 // Capture the original bytes for byte-identity comparison.
2598 let original = fs::read(&entry_path).expect("entry file must exist");
2599 assert!(!original.is_empty(), "freshly generated entry must be non-empty");
2600
2601 // Lock the directory: read+execute only, no write. fs::rename
2602 // into this directory will fail.
2603 let orig_dir_mode = fs::metadata(&dir).unwrap().permissions().mode() & 0o777;
2604 fs::set_permissions(&dir, fs::Permissions::from_mode(0o500)).unwrap();
2605
2606 // Attempt a fresh write to the SAME path -- must fail because
2607 // the directory is read-only, exercising the rename-failure
2608 // branch.
2609 let res = write_file_600(&entry_path, b"new junk that must not land");
2610 assert!(res.is_err(), "write_file_600 must fail when dir is read-only");
2611
2612 // Restore perms so we can read back the entry.
2613 fs::set_permissions(&dir, fs::Permissions::from_mode(orig_dir_mode)).unwrap();
2614
2615 // The original key file must be byte-identical to what we
2616 // captured before the failed write.
2617 let after = fs::read(&entry_path).expect("entry file must still exist after failed write");
2618 assert_eq!(
2619 after, original,
2620 "failed atomic write must not corrupt the original file",
2621 );
2622
2623 // And the keystore must still produce a working signer from it.
2624 let store2 = Store::open(&dir).unwrap();
2625 let signer = store2
2626 .signer(&info.id)
2627 .expect("original key must still decrypt after a failed write");
2628 let pae = crate::attestation::pae("text/plain", b"survive");
2629 assert_eq!(signer.sign(&pae).unwrap().len(), 64);
2630
2631 // No stale tmp file left behind.
2632 let tmp = entry_path.with_extension("tmp");
2633 assert!(!tmp.exists(), "tmp file must be cleaned up after rename failure");
2634
2635 cleanup(dir);
2636 }
2637
2638 /// H2: the entry file's mode is 0o600 at the moment of creation, set
2639 /// via OpenOptionsExt::mode rather than a post-write set_permissions
2640 /// (which had a tiny window of looser perms). Also confirms the tmp
2641 /// file is removed by the rename.
2642 #[test]
2643 #[cfg(unix)]
2644 fn mode_is_600_at_creation() {
2645 use std::os::unix::fs::PermissionsExt;
2646 let (store, dir) = make_store();
2647 let info = store.generate(true).unwrap();
2648 let entry_path = store.entry_path(&info.id);
2649
2650 let mode = fs::metadata(&entry_path).unwrap().permissions().mode() & 0o777;
2651 assert_eq!(mode, 0o600, "entry file must be 0600 at creation, got {:o}", mode);
2652
2653 let tmp = entry_path.with_extension("tmp");
2654 assert!(
2655 !tmp.exists(),
2656 "no .tmp file must be left behind after a successful atomic write"
2657 );
2658
2659 cleanup(dir);
2660 }
2661
2662 #[test]
2663 #[cfg(unix)]
2664 fn fix_perms_repairs_loose_modes() {
2665 use std::os::unix::fs::PermissionsExt;
2666 let (store, dir) = make_store();
2667 let info = store.generate(true).unwrap();
2668 let key_path = store.entry_path(&info.id);
2669
2670 fs::set_permissions(&dir, fs::Permissions::from_mode(0o755)).unwrap();
2671 fs::set_permissions(&key_path, fs::Permissions::from_mode(0o644)).unwrap();
2672
2673 let changes = store.fix_perms().unwrap();
2674 // dir + key file + manifest = 3 paths to fix (manifest may already be 0600
2675 // depending on Manifest write path; we only assert the loose ones moved).
2676 assert!(
2677 changes.iter().any(|(p, _, _)| p == &dir),
2678 "dir should be repaired"
2679 );
2680 assert!(
2681 changes.iter().any(|(p, _, _)| p == &key_path),
2682 "key file should be repaired"
2683 );
2684
2685 let dir_mode = fs::metadata(&dir).unwrap().permissions().mode() & 0o777;
2686 let key_mode = fs::metadata(&key_path).unwrap().permissions().mode() & 0o777;
2687 assert_eq!(dir_mode, 0o700);
2688 assert_eq!(key_mode, 0o600);
2689
2690 // After repair, signing must work again.
2691 store.signer(&info.id).expect("signing must work after fix_perms");
2692
2693 cleanup(dir);
2694 }
2695
2696 // --- TS-2026-001 post-merge fix-up: entry-binding AAD ------------------
2697
2698 /// Post-merge audit fix: the v2 AAD now binds entry id + public key
2699 /// into the GCM tag. Without that binding, a local attacker with
2700 /// write access to ~/.treeship/keys/ could copy entry A's
2701 /// `enc_priv_key` ciphertext into entry B's JSON envelope; the
2702 /// decrypt would succeed (same machine key, same framing-only AAD)
2703 /// and the signer for advertised key id A would silently sign with
2704 /// key B's secret scalar.
2705 ///
2706 /// This test performs exactly that swap and asserts decryption now
2707 /// fails. Before the fix this test would silently pass with the
2708 /// wrong scalar -- a true regression guard.
2709 #[test]
2710 fn cross_entry_swap_fails_decryption() {
2711 let (store, dir) = make_store();
2712
2713 // Two independent keys in the same store, same machine key.
2714 let a = store.generate(true).unwrap();
2715 let b = store.generate(false).unwrap();
2716
2717 // Snapshot both on-disk envelopes.
2718 let path_a = store.entry_path(&a.id);
2719 let path_b = store.entry_path(&b.id);
2720 let entry_a: EncryptedEntry =
2721 serde_json::from_slice(&fs::read(&path_a).unwrap()).unwrap();
2722 let entry_b: EncryptedEntry =
2723 serde_json::from_slice(&fs::read(&path_b).unwrap()).unwrap();
2724
2725 // Sanity: both are v2 framed, and the ciphertexts differ.
2726 assert_eq!(entry_a.enc_priv_key[0], KEYSTORE_MAGIC);
2727 assert_eq!(entry_a.enc_priv_key[1], KEYSTORE_VERSION_V2);
2728 assert_eq!(entry_b.enc_priv_key[0], KEYSTORE_MAGIC);
2729 assert_eq!(entry_b.enc_priv_key[1], KEYSTORE_VERSION_V2);
2730 assert_ne!(
2731 entry_a.enc_priv_key, entry_b.enc_priv_key,
2732 "two freshly-generated entries must have distinct ciphertexts"
2733 );
2734
2735 // The attack: copy B's enc_priv_key into A's envelope. Leave
2736 // everything else (id, public_key, algorithm) as it was in A.
2737 // This is the file an attacker with write access to the keys
2738 // directory would produce.
2739 let mut tampered_a = entry_a.clone();
2740 tampered_a.enc_priv_key = entry_b.enc_priv_key.clone();
2741 // The v2 nonce travels inline with the ciphertext (bytes
2742 // [2..14] of enc_priv_key), so swapping the blob also swaps
2743 // the nonce; the separate JSON `nonce` field is empty for v2
2744 // entries either way.
2745 fs::write(&path_a, serde_json::to_vec_pretty(&tampered_a).unwrap()).unwrap();
2746
2747 // Fresh Store so the in-memory cache doesn't paper over the
2748 // on-disk tamper.
2749 let store2 = Store::open(&dir).unwrap();
2750 let err = match store2.signer(&a.id) {
2751 Ok(_) => panic!(
2752 "swapping B's ciphertext into A's envelope must fail decrypt; \
2753 got Ok which means the signer would silently sign with key B"
2754 ),
2755 Err(e) => e,
2756 };
2757
2758 // The specific error must be a crypto/MAC failure, not (e.g.)
2759 // a NotFound or InsecureKeyPerms surface that could mask the
2760 // class of bug.
2761 match err {
2762 KeyError::Crypto(msg) => assert!(
2763 msg.contains("MAC verification failed"),
2764 "swap must surface MAC failure; got: {msg}"
2765 ),
2766 other => panic!("expected Crypto MAC error, got: {other:?}"),
2767 }
2768
2769 cleanup(dir);
2770 }
2771
2772 /// Companion to `cross_entry_swap_fails_decryption`: the id field
2773 /// is also bound into the AAD, so editing the JSON `id` while
2774 /// leaving the ciphertext alone must also fail. (An attacker who
2775 /// renames a stolen entry file onto a victim's id without
2776 /// re-encrypting would land here.)
2777 #[test]
2778 fn aad_tampered_entry_id_fails_decryption() {
2779 let (store, dir) = make_store();
2780 let info = store.generate(true).unwrap();
2781 let path = store.entry_path(&info.id);
2782
2783 let mut entry: EncryptedEntry =
2784 serde_json::from_slice(&fs::read(&path).unwrap()).unwrap();
2785 assert_eq!(entry.id, info.id, "sanity: id matches what generate returned");
2786
2787 // Pretend the attacker forged an id. Note we write this back to
2788 // the SAME file path so Store::load_entry by the original id
2789 // finds it; if we changed the path too we'd just be testing
2790 // NotFound, which isn't the point.
2791 entry.id = "key_attacker_substituted_id".to_string();
2792 fs::write(&path, serde_json::to_vec_pretty(&entry).unwrap()).unwrap();
2793
2794 // Fresh Store so cache doesn't paper this over. Load via the
2795 // tampered id (matching what's in the JSON) so we exercise the
2796 // decrypt path rather than a path-vs-id mismatch.
2797 let store2 = Store::open(&dir).unwrap();
2798 // Drop the cache by opening fresh; load by the on-disk id.
2799 // The entry_path for "key_attacker_substituted_id" doesn't
2800 // exist, so we deliberately call the lower-level read by
2801 // path-of-original and assert decrypt fails via the dispatcher.
2802 // Easiest: bypass entry_path and invoke decrypt_from_disk with
2803 // the tampered id directly.
2804 let key_buf = store2.machine_key;
2805 let result = decrypt_from_disk(
2806 &key_buf,
2807 &entry.id, // tampered id (bound into AAD)
2808 &entry.public_key, // original pubkey
2809 &entry.enc_priv_key,
2810 &entry.nonce,
2811 );
2812 assert!(
2813 result.is_err(),
2814 "AAD-bound entry id mismatch must fail decrypt; got Ok"
2815 );
2816
2817 cleanup(dir);
2818 }
2819}