Skip to main content

sochdb_storage/
keyring.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4
5//! # Keyring — KEK/DEK envelope for data-at-rest encryption (Task 3B)
6//!
7//! The keyring is the per-database key-management substrate that sits in front
8//! of [`crate::encryption::EncryptionEngine`]. It exists so that:
9//!
10//! - The operator-supplied secret (the **KEK**, e.g. `SOCHDB_ENCRYPTION_KEY` or
11//!   an embedded `ConnectionConfig.encryption` key) is **never** used verbatim
12//!   as the cipher key. Instead a random per-DB **DEK** is generated, and the
13//!   KEK only wraps it. Rotating the KEK is then a cheap re-wrap — it does NOT
14//!   require re-encrypting any data.
15//! - A wrong or missing key is caught **fail-closed at open**, before a single
16//!   WAL byte is read, via an authenticated descriptor MAC and a DEK canary —
17//!   never silently degrading to a plaintext read or an "empty" database.
18//! - A `key_epoch` is reserved from the first encrypted byte, so future DEK
19//!   rotation is expressible on disk without a format break.
20//!
21//! ## On-disk descriptor (`<db_dir>/keyring.json`)
22//!
23//! The **presence** of this file with `encrypted=true` is the source of truth
24//! that a database is encrypted. A plaintext DB has no keyring file at all
25//! (preserving byte-compatibility with pre-3B binaries). All byte fields are
26//! hex-encoded. The whole descriptor is authenticated by `mac` =
27//! HMAC-SHA256(HKDF(KEK, salt, "keyring-mac"), canonical-fields), so an attacker
28//! or a bad rollback cannot flip `encrypted` to `false` to force a downgrade.
29//!
30//! ```text
31//! { format_version, encrypted, db_uuid, kek_source_id, key_epoch,
32//!   salt, wrapped_dek, canary, mac }
33//! ```
34//!
35//! - `wrapped_dek` = AEAD(HKDF(KEK,salt,"wrap")).encrypt(DEK, aad=wrap_aad)
36//! - `canary`      = AEAD(DEK).encrypt(CANARY_TOKEN, aad=canary_aad)
37//!
38//! On open we (1) verify the MAC with the KEK, (2) unwrap the DEK, (3) decrypt
39//! the canary with the DEK. Any failure ⇒ hard error (wrong/missing KEK or
40//! tampering). Only after all three pass is the WAL touched.
41
42use std::fs;
43use std::io::Write as _;
44use std::path::{Path, PathBuf};
45use std::sync::Arc;
46
47use hmac::{Hmac, Mac};
48use sha2::Sha256;
49
50use crate::encryption::{EncryptionEngine, EncryptionKey, derive_subkey, generate_key};
51use sochdb_core::{Result, SochDBError};
52
53type HmacSha256 = Hmac<Sha256>;
54
55/// Current keyring descriptor format version.
56const KEYRING_FORMAT_VERSION: u32 = 1;
57/// Keyring file name within the database directory.
58pub const KEYRING_FILE_NAME: &str = "keyring.json";
59/// Fixed plaintext token sealed under the DEK to detect a wrong key at open.
60const CANARY_TOKEN: &[u8] = b"sochdb-keyring-canary-v1";
61/// HKDF info labels — keep these stable; they are part of the on-disk contract.
62const INFO_WRAP: &[u8] = b"sochdb/keyring/wrap/v1";
63const INFO_MAC: &[u8] = b"sochdb/keyring/mac/v1";
64
65/// The resolved encryption state for an opened database.
66///
67/// `Plaintext` carries a disabled engine (identity passthrough, byte-identical
68/// legacy frames). `Encrypted` carries the live DEK-backed engine plus the
69/// `db_uuid` and `key_epoch` that the WAL binds into every record's AAD.
70pub enum EncryptionState {
71    Plaintext,
72    Encrypted(ActiveEncryption),
73}
74
75impl std::fmt::Debug for EncryptionState {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        // Deliberately does NOT print key material.
78        match self {
79            EncryptionState::Plaintext => write!(f, "EncryptionState::Plaintext"),
80            EncryptionState::Encrypted(a) => write!(
81                f,
82                "EncryptionState::Encrypted {{ db_uuid: {}, key_epoch: {} }}",
83                hex::encode(a.db_uuid),
84                a.key_epoch
85            ),
86        }
87    }
88}
89
90impl EncryptionState {
91    /// Whether at-rest encryption is active for this database.
92    pub fn is_encrypted(&self) -> bool {
93        matches!(self, EncryptionState::Encrypted(_))
94    }
95
96    /// The engine to hand to the WAL (disabled passthrough if plaintext).
97    pub fn engine(&self) -> Arc<EncryptionEngine> {
98        match self {
99            EncryptionState::Plaintext => Arc::new(EncryptionEngine::disabled()),
100            EncryptionState::Encrypted(a) => a.engine.clone(),
101        }
102    }
103
104    /// 16-byte DB identity bound into WAL AAD (all-zero for plaintext, unused).
105    pub fn db_uuid(&self) -> [u8; 16] {
106        match self {
107            EncryptionState::Plaintext => [0u8; 16],
108            EncryptionState::Encrypted(a) => a.db_uuid,
109        }
110    }
111
112    /// Active DEK epoch (0 for plaintext, unused).
113    pub fn key_epoch(&self) -> u32 {
114        match self {
115            EncryptionState::Plaintext => 0,
116            EncryptionState::Encrypted(a) => a.key_epoch,
117        }
118    }
119}
120
121/// Live encryption context for an encrypted database.
122pub struct ActiveEncryption {
123    pub engine: Arc<EncryptionEngine>,
124    pub db_uuid: [u8; 16],
125    pub key_epoch: u32,
126}
127
128/// On-disk keyring descriptor (hex-encoded byte fields).
129#[derive(serde::Serialize, serde::Deserialize)]
130struct KeyringFile {
131    format_version: u32,
132    encrypted: bool,
133    db_uuid: String,
134    kek_source_id: String,
135    key_epoch: u32,
136    salt: String,
137    wrapped_dek: String,
138    canary: String,
139    mac: String,
140}
141
142/// Resolve the encryption state for a database directory.
143///
144/// Contract (the file's presence + `encrypted` flag is the source of truth):
145/// - **keyring present, `encrypted=true`**: `kek` is REQUIRED. We verify the
146///   descriptor MAC, unwrap the DEK, and check the canary. Any failure (wrong
147///   key, missing key, tamper) is a hard, fail-closed error — we never fall
148///   back to a disabled engine. Returns `Encrypted`.
149/// - **keyring present, `encrypted=false`**: plaintext DB. Returns `Plaintext`.
150/// - **keyring absent + `kek = Some`**: a *new* encrypted DB. Generates a DEK,
151///   wraps it, writes the keyring atomically. `allow_create` MUST be true (the
152///   caller asserts this is not an existing plaintext DB). Returns `Encrypted`.
153/// - **keyring absent + `kek = None`**: legacy/plaintext DB. Returns `Plaintext`.
154pub fn load_or_init(
155    db_dir: &Path,
156    kek: Option<&EncryptionKey>,
157    source_id: &str,
158    allow_create: bool,
159) -> Result<EncryptionState> {
160    let path = keyring_path(db_dir);
161
162    if path.exists() {
163        let file: KeyringFile = read_keyring(&path)?;
164        if file.format_version != KEYRING_FORMAT_VERSION {
165            return Err(SochDBError::Encryption(format!(
166                "unsupported keyring format version {} (expected {})",
167                file.format_version, KEYRING_FORMAT_VERSION
168            )));
169        }
170        // A keyring file is ONLY ever written for an encrypted database, so its
171        // mere presence means a KEK is required. Resolve it fail-closed BEFORE
172        // honoring the `encrypted` flag — otherwise an attacker could flip
173        // `encrypted` to false (or drop the env key) to force a plaintext
174        // downgrade. The descriptor MAC (verified inside `open_encrypted`, and
175        // re-checked below for the plaintext-marker case) cannot be recomputed
176        // without the KEK, so a forged `encrypted=false` is rejected.
177        let kek = kek.ok_or_else(|| {
178            SochDBError::Encryption(
179                "database has a keyring (encryption configured) but no \
180                 encryption key was provided (set the KEK, e.g. \
181                 SOCHDB_ENCRYPTION_KEY); refusing to open"
182                    .to_string(),
183            )
184        })?;
185        // Authenticate the descriptor with the KEK first. This rejects a forged
186        // `encrypted=false` downgrade, since the MAC covers the `encrypted`
187        // field and cannot be reforged without the KEK.
188        verify_mac(&file, kek)?;
189        if !file.encrypted {
190            // MAC-authenticated plaintext marker (not written by current code,
191            // but honored if it ever authenticates — defensive forward-compat).
192            return Ok(EncryptionState::Plaintext);
193        }
194        open_encrypted(file, kek)
195    } else if let Some(kek) = kek {
196        if !allow_create {
197            return Err(SochDBError::Encryption(
198                "an encryption key was provided for a database that has no \
199                 keyring (existing plaintext data must be migrated explicitly, \
200                 not encrypted in place); refusing to open"
201                    .to_string(),
202            ));
203        }
204        create_encrypted(db_dir, &path, kek, source_id)
205    } else {
206        Ok(EncryptionState::Plaintext)
207    }
208}
209
210fn keyring_path(db_dir: &Path) -> PathBuf {
211    db_dir.join(KEYRING_FILE_NAME)
212}
213
214fn read_keyring(path: &Path) -> Result<KeyringFile> {
215    let bytes = fs::read(path)?;
216    serde_json::from_slice(&bytes)
217        .map_err(|e| SochDBError::Encryption(format!("malformed keyring: {e}")))
218}
219
220/// Build the canonical, deterministic byte string the MAC authenticates.
221/// Length-prefixed fields so no two distinct descriptors collide.
222fn mac_input(file: &KeyringFile) -> Vec<u8> {
223    let mut out = Vec::new();
224    let mut push = |b: &[u8]| {
225        out.extend_from_slice(&(b.len() as u32).to_le_bytes());
226        out.extend_from_slice(b);
227    };
228    push(&file.format_version.to_le_bytes());
229    push(&[file.encrypted as u8]);
230    push(file.db_uuid.as_bytes());
231    push(file.kek_source_id.as_bytes());
232    push(&file.key_epoch.to_le_bytes());
233    push(file.salt.as_bytes());
234    push(file.wrapped_dek.as_bytes());
235    push(file.canary.as_bytes());
236    out
237}
238
239fn compute_mac(mac_key: &EncryptionKey, file: &KeyringFile) -> Vec<u8> {
240    let mut mac = <HmacSha256 as Mac>::new_from_slice(mac_key.as_bytes())
241        .expect("HMAC accepts any key length");
242    mac.update(&mac_input(file));
243    mac.finalize().into_bytes().to_vec()
244}
245
246/// AAD binding the wrapped DEK to this DB identity + epoch + KEK source, so a
247/// wrapped DEK cannot be spliced into a different DB/epoch under a shared KEK.
248fn wrap_aad(db_uuid: &[u8; 16], epoch: u32, source_id: &str) -> Vec<u8> {
249    let mut aad = Vec::with_capacity(16 + 4 + source_id.len());
250    aad.extend_from_slice(db_uuid);
251    aad.extend_from_slice(&epoch.to_le_bytes());
252    aad.extend_from_slice(source_id.as_bytes());
253    aad
254}
255
256fn canary_aad(db_uuid: &[u8; 16], epoch: u32) -> Vec<u8> {
257    let mut aad = Vec::with_capacity(16 + 4);
258    aad.extend_from_slice(db_uuid);
259    aad.extend_from_slice(&epoch.to_le_bytes());
260    aad
261}
262
263fn create_encrypted(
264    db_dir: &Path,
265    path: &Path,
266    kek: &EncryptionKey,
267    source_id: &str,
268) -> Result<EncryptionState> {
269    let mut db_uuid = [0u8; 16];
270    {
271        use rand::RngCore;
272        rand::rngs::OsRng.fill_bytes(&mut db_uuid);
273    }
274    let mut salt = [0u8; 16];
275    {
276        use rand::RngCore;
277        rand::rngs::OsRng.fill_bytes(&mut salt);
278    }
279    let epoch: u32 = 0;
280
281    // Random per-DB DEK; this is what actually encrypts data.
282    let dek = EncryptionKey::new(generate_key());
283
284    // Wrap the DEK under a wrapping key derived from the KEK.
285    let wrap_key = derive_subkey(kek.as_bytes(), &salt, INFO_WRAP);
286    let wrap_engine = EncryptionEngine::from_key(&wrap_key);
287    let wrapped_dek =
288        wrap_engine.encrypt_with_aad(dek.as_bytes(), &wrap_aad(&db_uuid, epoch, source_id))?;
289
290    // Seal a canary under the DEK so a wrong key is detected at open.
291    let dek_engine = EncryptionEngine::from_key(&dek);
292    let canary = dek_engine.encrypt_with_aad(CANARY_TOKEN, &canary_aad(&db_uuid, epoch))?;
293
294    let mut file = KeyringFile {
295        format_version: KEYRING_FORMAT_VERSION,
296        encrypted: true,
297        db_uuid: hex::encode(db_uuid),
298        kek_source_id: source_id.to_string(),
299        key_epoch: epoch,
300        salt: hex::encode(salt),
301        wrapped_dek: hex::encode(&wrapped_dek),
302        canary: hex::encode(&canary),
303        mac: String::new(),
304    };
305    let mac_key = derive_subkey(kek.as_bytes(), &salt, INFO_MAC);
306    file.mac = hex::encode(compute_mac(&mac_key, &file));
307
308    write_keyring_atomic(db_dir, path, &file)?;
309
310    Ok(EncryptionState::Encrypted(ActiveEncryption {
311        engine: Arc::new(dek_engine),
312        db_uuid,
313        key_epoch: epoch,
314    }))
315}
316
317/// Verify the descriptor MAC with the KEK. A wrong KEK or any tampering of an
318/// authenticated field (e.g. `encrypted` flipped to false, epoch altered) fails
319/// here — the MAC cannot be recomputed without the KEK.
320fn verify_mac(file: &KeyringFile, kek: &EncryptionKey) -> Result<()> {
321    let salt = decode_fixed::<16>(&file.salt, "salt")?;
322    let mac_key = derive_subkey(kek.as_bytes(), &salt, INFO_MAC);
323    let expected = compute_mac(&mac_key, file);
324    let actual = hex::decode(&file.mac)
325        .map_err(|_| SochDBError::Encryption("malformed keyring mac".into()))?;
326    if !constant_time_eq(&expected, &actual) {
327        return Err(SochDBError::Encryption(
328            "keyring authentication failed: wrong encryption key or tampered \
329             keyring; refusing to open"
330                .to_string(),
331        ));
332    }
333    Ok(())
334}
335
336fn open_encrypted(file: KeyringFile, kek: &EncryptionKey) -> Result<EncryptionState> {
337    // MAC is already verified by the caller (load_or_init).
338    let salt = decode_fixed::<16>(&file.salt, "salt")?;
339    let db_uuid = decode_fixed::<16>(&file.db_uuid, "db_uuid")?;
340    let epoch = file.key_epoch;
341
342    // Unwrap the DEK.
343    let wrap_key = derive_subkey(kek.as_bytes(), &salt, INFO_WRAP);
344    let wrap_engine = EncryptionEngine::from_key(&wrap_key);
345    let wrapped_dek = hex::decode(&file.wrapped_dek)
346        .map_err(|_| SochDBError::Encryption("malformed wrapped_dek".into()))?;
347    let dek_bytes = wrap_engine
348        .decrypt_with_aad(
349            &wrapped_dek,
350            &wrap_aad(&db_uuid, epoch, &file.kek_source_id),
351        )
352        .map_err(|_| {
353            SochDBError::Encryption(
354                "failed to unwrap data key: wrong encryption key; refusing to open".into(),
355            )
356        })?;
357    if dek_bytes.len() != 32 {
358        return Err(SochDBError::Encryption(
359            "unwrapped DEK is not 32 bytes".into(),
360        ));
361    }
362    let mut dek_arr = [0u8; 32];
363    dek_arr.copy_from_slice(&dek_bytes);
364    let dek = EncryptionKey::new(dek_arr);
365
366    // Canary check: prove the DEK actually decrypts data before touching WAL.
367    let dek_engine = EncryptionEngine::from_key(&dek);
368    let canary = hex::decode(&file.canary)
369        .map_err(|_| SochDBError::Encryption("malformed canary".into()))?;
370    let token = dek_engine
371        .decrypt_with_aad(&canary, &canary_aad(&db_uuid, epoch))
372        .map_err(|_| {
373            SochDBError::Encryption(
374                "canary decryption failed: wrong encryption key; refusing to open".into(),
375            )
376        })?;
377    if token != CANARY_TOKEN {
378        return Err(SochDBError::Encryption(
379            "canary token mismatch; refusing to open".into(),
380        ));
381    }
382
383    Ok(EncryptionState::Encrypted(ActiveEncryption {
384        engine: Arc::new(dek_engine),
385        db_uuid,
386        key_epoch: epoch,
387    }))
388}
389
390fn decode_fixed<const N: usize>(hexstr: &str, what: &str) -> Result<[u8; N]> {
391    let v = hex::decode(hexstr)
392        .map_err(|_| SochDBError::Encryption(format!("malformed keyring {what}")))?;
393    if v.len() != N {
394        return Err(SochDBError::Encryption(format!(
395            "keyring {what} wrong length: {} != {N}",
396            v.len()
397        )));
398    }
399    let mut a = [0u8; N];
400    a.copy_from_slice(&v);
401    Ok(a)
402}
403
404/// Constant-time equality for MAC comparison.
405fn constant_time_eq(a: &[u8], b: &[u8]) -> bool {
406    if a.len() != b.len() {
407        return false;
408    }
409    let mut diff = 0u8;
410    for (x, y) in a.iter().zip(b.iter()) {
411        diff |= x ^ y;
412    }
413    diff == 0
414}
415
416/// Atomically persist the keyring: write temp, fsync file, rename, fsync dir.
417fn write_keyring_atomic(db_dir: &Path, path: &Path, file: &KeyringFile) -> Result<()> {
418    fs::create_dir_all(db_dir)?;
419    let json = serde_json::to_vec_pretty(file)
420        .map_err(|e| SochDBError::Encryption(format!("serialize keyring: {e}")))?;
421    let tmp = path.with_extension("json.tmp");
422    {
423        let mut f = fs::File::create(&tmp)?;
424        f.write_all(&json)?;
425        f.sync_all()?;
426    }
427    fs::rename(&tmp, path)?;
428    // fsync the directory so the rename is durable.
429    if let Ok(dir) = fs::File::open(db_dir) {
430        let _ = dir.sync_all();
431    }
432    Ok(())
433}
434
435#[cfg(test)]
436mod tests {
437    use super::*;
438    use tempfile::tempdir;
439
440    fn kek(seed: u8) -> EncryptionKey {
441        EncryptionKey::new([seed; 32])
442    }
443
444    #[test]
445    fn plaintext_when_no_key_and_no_file() {
446        let dir = tempdir().unwrap();
447        let st = load_or_init(dir.path(), None, "test", true).unwrap();
448        assert!(!st.is_encrypted());
449        assert!(!dir.path().join(KEYRING_FILE_NAME).exists());
450    }
451
452    #[test]
453    fn create_then_reopen_roundtrips_dek() {
454        let dir = tempdir().unwrap();
455        let st = load_or_init(dir.path(), Some(&kek(7)), "env", true).unwrap();
456        assert!(st.is_encrypted());
457        let uuid1 = st.db_uuid();
458        // Engine actually encrypts.
459        let ct = st.engine().encrypt(b"secret").unwrap();
460        assert_ne!(ct, b"secret");
461
462        // Reopen with the SAME kek -> same DEK -> can decrypt the ciphertext.
463        let st2 = load_or_init(dir.path(), Some(&kek(7)), "env", false).unwrap();
464        assert!(st2.is_encrypted());
465        assert_eq!(st2.db_uuid(), uuid1);
466        assert_eq!(st2.engine().decrypt(&ct).unwrap(), b"secret");
467    }
468
469    #[test]
470    fn reopen_with_wrong_key_fails_closed() {
471        let dir = tempdir().unwrap();
472        load_or_init(dir.path(), Some(&kek(1)), "env", true).unwrap();
473        let err = load_or_init(dir.path(), Some(&kek(2)), "env", false).unwrap_err();
474        // Must be a hard encryption error, NOT a silent plaintext/empty open.
475        assert!(matches!(err, SochDBError::Encryption(_)));
476    }
477
478    #[test]
479    fn reopen_encrypted_without_key_fails_closed() {
480        let dir = tempdir().unwrap();
481        load_or_init(dir.path(), Some(&kek(1)), "env", true).unwrap();
482        let err = load_or_init(dir.path(), None, "env", true).unwrap_err();
483        assert!(matches!(err, SochDBError::Encryption(_)));
484    }
485
486    #[test]
487    fn forging_encrypted_false_is_rejected_by_mac() {
488        let dir = tempdir().unwrap();
489        load_or_init(dir.path(), Some(&kek(9)), "env", true).unwrap();
490        let path = dir.path().join(KEYRING_FILE_NAME);
491
492        // Attacker flips encrypted -> false to force a plaintext downgrade.
493        let mut file: KeyringFile = serde_json::from_slice(&fs::read(&path).unwrap()).unwrap();
494        file.encrypted = false;
495        fs::write(&path, serde_json::to_vec_pretty(&file).unwrap()).unwrap();
496
497        // MAC is verified BEFORE the encrypted flag is honored, and the MAC
498        // covers `encrypted`, so the forgery is rejected fail-closed — never a
499        // silent plaintext/empty open.
500        let err = load_or_init(dir.path(), Some(&kek(9)), "env", false).unwrap_err();
501        assert!(matches!(err, SochDBError::Encryption(_)));
502    }
503
504    #[test]
505    fn keyring_present_but_no_key_fails_even_if_flag_says_plaintext() {
506        let dir = tempdir().unwrap();
507        load_or_init(dir.path(), Some(&kek(4)), "env", true).unwrap();
508        let path = dir.path().join(KEYRING_FILE_NAME);
509        let mut file: KeyringFile = serde_json::from_slice(&fs::read(&path).unwrap()).unwrap();
510        file.encrypted = false; // forged
511        fs::write(&path, serde_json::to_vec_pretty(&file).unwrap()).unwrap();
512
513        // No key supplied + keyring present ⇒ refuse (presence implies encryption).
514        let err = load_or_init(dir.path(), None, "env", false).unwrap_err();
515        assert!(matches!(err, SochDBError::Encryption(_)));
516    }
517
518    #[test]
519    fn tampering_authenticated_field_is_rejected() {
520        let dir = tempdir().unwrap();
521        load_or_init(dir.path(), Some(&kek(5)), "env", true).unwrap();
522        let path = dir.path().join(KEYRING_FILE_NAME);
523
524        let mut file: KeyringFile = serde_json::from_slice(&fs::read(&path).unwrap()).unwrap();
525        // Tamper an authenticated field while keeping encrypted=true.
526        file.key_epoch = 999;
527        fs::write(&path, serde_json::to_vec_pretty(&file).unwrap()).unwrap();
528
529        let err = load_or_init(dir.path(), Some(&kek(5)), "env", false).unwrap_err();
530        assert!(matches!(err, SochDBError::Encryption(_)));
531    }
532
533    #[test]
534    fn key_provided_for_existing_plaintext_db_without_create_fails() {
535        let dir = tempdir().unwrap();
536        // Simulate an existing plaintext DB: a wal.log but no keyring.
537        fs::write(dir.path().join("wal.log"), b"legacy").unwrap();
538        let err = load_or_init(dir.path(), Some(&kek(3)), "env", false).unwrap_err();
539        assert!(matches!(err, SochDBError::Encryption(_)));
540    }
541}