Skip to main content

umbral_core/orm/
masked.rs

1//! `Masked<T>` — an encrypt-at-rest field type for PII / secrets.
2//!
3//! ## What this is
4//!
5//! A `Masked<String>` is a `String`-shaped field that is stored
6//! **encrypted** in the database (base64 ciphertext in a plain `TEXT`
7//! column) and **redacted** everywhere it would otherwise leak — its
8//! `Debug`, `Display`, and serde output are all `"••••••"`. The
9//! plaintext is recoverable only through an explicit [`Masked::reveal`]
10//! call, which needs the private key.
11//!
12//! The point is GDPR-style field encryption: mark a column
13//! `phone: Masked<String>` and a stolen DB dump leaks ciphertext, not
14//! phone numbers. `umbral-oauth` uses it for provider access/refresh
15//! tokens.
16//!
17//! ## Crypto: anonymous sealed boxes (public-key encryption)
18//!
19//! Encryption uses an X25519 + XSalsa20-Poly1305 box (the RustCrypto
20//! [`crypto_box`] primitive) in an **anonymous-sender** construction: a
21//! fresh ephemeral keypair is generated per value, the box is sealed to
22//! the configured *public* key, and the ephemeral public key + nonce are
23//! prepended to the ciphertext. Anyone holding the public key can
24//! encrypt; only the holder of the *private* key can decrypt. That
25//! asymmetry buys two things:
26//!
27//! - a write-only tier can store PII it can never read, and
28//! - **crypto-shredding**: destroying the private key renders every
29//!   masked column permanently unrecoverable — a fast bulk erasure for
30//!   "right to be forgotten".
31//!
32//! ## Keys
33//!
34//! The keyring is resolved once (ambient `OnceLock`, like the DB pool)
35//! from `UMBRAL_MASK_PUBLIC_KEY` and the optional `UMBRAL_MASK_PRIVATE_KEY`
36//! (both base64), or injected explicitly via [`set_mask_keyring`] (tests,
37//! or an app that loads keys from a vault). Generate a keypair with
38//! `cargo run -- maskkeygen`. Encryption needs only the public key;
39//! [`Masked::reveal`] needs the private key and returns
40//! [`MaskError::NoPrivateKey`] when it's absent.
41
42use std::sync::OnceLock;
43
44use base64::Engine as _;
45use crypto_box::{
46    SalsaBox,
47    aead::{Aead, AeadCore},
48};
49use rand_core::OsRng;
50use serde::{Deserialize, Deserializer, Serialize, Serializer};
51
52const REDACTED: &str = "••••••";
53const B64: base64::engine::general_purpose::GeneralPurpose =
54    base64::engine::general_purpose::STANDARD;
55
56/// Errors from sealing / revealing a [`Masked`] value.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum MaskError {
59    /// No mask keyring is configured (neither `set_mask_keyring` was
60    /// called nor `UMBRAL_MASK_PUBLIC_KEY` is set). Encryption can't run.
61    NoKeyring,
62    /// The keyring has a public key but no private key, so the value
63    /// can be stored but not revealed. Set `UMBRAL_MASK_PRIVATE_KEY`.
64    NoPrivateKey,
65    /// A configured key was not valid base64 / not 32 bytes.
66    BadKey(String),
67    /// The stored ciphertext is malformed (too short / truncated).
68    Malformed,
69    /// Authenticated decryption failed (wrong key, or tampered data).
70    Decrypt,
71}
72
73impl std::fmt::Display for MaskError {
74    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75        match self {
76            MaskError::NoKeyring => f.write_str(
77                "no mask keyring configured (set UMBRAL_MASK_PUBLIC_KEY or call set_mask_keyring)",
78            ),
79            MaskError::NoPrivateKey => f.write_str(
80                "mask keyring has no private key; cannot reveal (set UMBRAL_MASK_PRIVATE_KEY)",
81            ),
82            MaskError::BadKey(why) => write!(f, "invalid mask key: {why}"),
83            MaskError::Malformed => f.write_str("masked ciphertext is malformed"),
84            MaskError::Decrypt => f.write_str("masked ciphertext failed to decrypt"),
85        }
86    }
87}
88
89impl std::error::Error for MaskError {}
90
91// =========================================================================
92// Keyring
93// =========================================================================
94
95/// A mask keyring: a recipient X25519 public key (always present) and an
96/// optional secret key (present only on tiers that need to decrypt).
97#[derive(Clone)]
98pub struct MaskKeyring {
99    public: crypto_box::PublicKey,
100    secret: Option<crypto_box::SecretKey>,
101}
102
103impl MaskKeyring {
104    /// Build a keyring from base64-encoded keys. The public key is
105    /// required; the secret key is optional (a write-only tier omits it).
106    pub fn from_base64(public_b64: &str, secret_b64: Option<&str>) -> Result<Self, MaskError> {
107        let public = decode_key(public_b64)?;
108        let public = crypto_box::PublicKey::from(public);
109        let secret = match secret_b64 {
110            Some(s) if !s.is_empty() => Some(crypto_box::SecretKey::from(decode_key(s)?)),
111            _ => None,
112        };
113        Ok(Self { public, secret })
114    }
115
116    /// Read `UMBRAL_MASK_PUBLIC_KEY` (+ optional `UMBRAL_MASK_PRIVATE_KEY`)
117    /// from the environment. Returns `Err(NoKeyring)` if the public key
118    /// is unset.
119    pub fn from_env() -> Result<Self, MaskError> {
120        let public = std::env::var("UMBRAL_MASK_PUBLIC_KEY").map_err(|_| MaskError::NoKeyring)?;
121        let secret = std::env::var("UMBRAL_MASK_PRIVATE_KEY").ok();
122        Self::from_base64(&public, secret.as_deref())
123    }
124
125    /// Generate a fresh keypair, returned as `(public_b64, secret_b64)`
126    /// for printing into env vars. Used by the `maskkeygen` command.
127    pub fn generate() -> (String, String) {
128        let secret = crypto_box::SecretKey::generate(&mut OsRng);
129        let public = secret.public_key();
130        (B64.encode(public.as_bytes()), B64.encode(secret.to_bytes()))
131    }
132
133    /// Seal `plaintext` to this keyring's public key, returning base64
134    /// ciphertext. Needs only the public key.
135    pub fn seal(&self, plaintext: &[u8]) -> String {
136        // Anonymous-sender sealed box: ephemeral keypair per message, box
137        // sealed to the recipient public key, ephemeral public key +
138        // nonce prepended so the recipient can reconstruct the box.
139        let eph_secret = crypto_box::SecretKey::generate(&mut OsRng);
140        let eph_public = eph_secret.public_key();
141        let salsa = SalsaBox::new(&self.public, &eph_secret);
142        let nonce = SalsaBox::generate_nonce(&mut OsRng);
143        let ciphertext = salsa
144            .encrypt(&nonce, plaintext)
145            .expect("XSalsa20-Poly1305 encryption is infallible for in-memory plaintext");
146        let mut out = Vec::with_capacity(32 + nonce.len() + ciphertext.len());
147        out.extend_from_slice(eph_public.as_bytes());
148        out.extend_from_slice(nonce.as_slice());
149        out.extend_from_slice(&ciphertext);
150        B64.encode(out)
151    }
152
153    /// Open base64 ciphertext produced by [`Self::seal`]. Needs the
154    /// private key.
155    pub fn open(&self, b64_ciphertext: &str) -> Result<String, MaskError> {
156        let secret = self.secret.as_ref().ok_or(MaskError::NoPrivateKey)?;
157        let sealed = B64
158            .decode(b64_ciphertext)
159            .map_err(|_| MaskError::Malformed)?;
160        if sealed.len() < 32 + 24 {
161            return Err(MaskError::Malformed);
162        }
163        let eph_public: [u8; 32] = sealed[..32].try_into().map_err(|_| MaskError::Malformed)?;
164        let eph_public = crypto_box::PublicKey::from(eph_public);
165        let nonce = crypto_box::Nonce::from_slice(&sealed[32..56]);
166        let ciphertext = &sealed[56..];
167        let salsa = SalsaBox::new(&eph_public, secret);
168        let plaintext = salsa
169            .decrypt(nonce, ciphertext)
170            .map_err(|_| MaskError::Decrypt)?;
171        String::from_utf8(plaintext).map_err(|_| MaskError::Decrypt)
172    }
173
174    /// Whether this keyring can decrypt (has a private key).
175    pub fn can_reveal(&self) -> bool {
176        self.secret.is_some()
177    }
178}
179
180fn decode_key(b64: &str) -> Result<[u8; 32], MaskError> {
181    let bytes = B64
182        .decode(b64.trim())
183        .map_err(|e| MaskError::BadKey(e.to_string()))?;
184    bytes
185        .try_into()
186        .map_err(|_| MaskError::BadKey("key is not 32 bytes".to_string()))
187}
188
189/// Ambient keyring state. Stores `Ok(Some(keyring))` when correctly
190/// configured, `Ok(None)` when masking is simply not configured (the key
191/// env-var is absent), and `Err(MaskError::BadKey(...))` when a key env-var
192/// IS present but is malformed (bad base64 or wrong byte length). The third
193/// state is the bug-fix: a malformed key must never silently resolve to
194/// `None` and allow plaintext writes.
195static KEYRING: OnceLock<Result<Option<MaskKeyring>, MaskError>> = OnceLock::new();
196
197/// Inject the ambient mask keyring (tests, or an app loading keys from a
198/// vault). Returns `false` if the keyring was already resolved. Mirrors
199/// `crate::storage::set_storage`'s set-once discipline.
200pub fn set_mask_keyring(keyring: MaskKeyring) -> bool {
201    KEYRING.set(Ok(Some(keyring))).is_ok()
202}
203
204/// The ambient keyring, lazily resolved from the environment on first access
205/// if not explicitly set.
206///
207/// Returns:
208/// - `Ok(Some(kr))` — correctly configured, use `kr` to seal/open.
209/// - `Ok(None)` — masking not configured (env-var absent); callers return
210///   `Err(MaskError::NoKeyring)`.
211/// - `Err(e)` — key IS present but malformed; callers propagate `e` so the
212///   caller sees `BadKey(...)` rather than the misleading `NoKeyring`.
213fn keyring() -> Result<Option<&'static MaskKeyring>, &'static MaskError> {
214    KEYRING
215        .get_or_init(|| match MaskKeyring::from_env() {
216            Ok(k) => Ok(Some(k)),
217            // No public key in the env → masking is simply not configured.
218            // That's an expected, silent state.
219            Err(MaskError::NoKeyring) => Ok(None),
220            // A key IS present but couldn't be parsed. Store the error so
221            // every subsequent seal/open returns BadKey, not the misleading
222            // NoKeyring. Also log once so operators see it in the startup
223            // logs without having to trigger an actual write.
224            Err(e) => {
225                tracing::error!(
226                    "UMBRAL_MASK_PUBLIC_KEY/UMBRAL_MASK_PRIVATE_KEY is set but could not be \
227                     parsed ({e}); all Masked<T> seal/reveal calls will fail with BadKey. \
228                     Fix the key or unset the variable."
229                );
230                Err(e)
231            }
232        })
233        .as_ref()
234        .map(|opt| opt.as_ref())
235        .map_err(|e| e)
236}
237
238/// Seal plaintext with the ambient keyring.
239fn ambient_seal(plaintext: &str) -> Result<String, MaskError> {
240    match keyring() {
241        Ok(Some(k)) => Ok(k.seal(plaintext.as_bytes())),
242        Ok(None) => Err(MaskError::NoKeyring),
243        Err(e) => Err(e.clone()),
244    }
245}
246
247/// Open ciphertext with the ambient keyring.
248fn ambient_open(ciphertext: &str) -> Result<String, MaskError> {
249    match keyring() {
250        Ok(Some(k)) => k.open(ciphertext),
251        Ok(None) => Err(MaskError::NoKeyring),
252        Err(e) => Err(e.clone()),
253    }
254}
255
256// =========================================================================
257// Masked<T>
258// =========================================================================
259
260/// An encrypt-at-rest string field. Plaintext when freshly constructed,
261/// ciphertext once loaded from the DB. Redacted in `Debug` / `Display` /
262/// serde output; reveal the plaintext with [`Masked::reveal`].
263///
264/// The type parameter is currently fixed to `String` in practice
265/// (`Masked<String>`); it exists so the API can widen to other revealed
266/// types later without a breaking rename.
267#[derive(Clone)]
268pub struct Masked<T = String> {
269    inner: MaskInner,
270    _marker: std::marker::PhantomData<T>,
271}
272
273#[derive(Clone)]
274enum MaskInner {
275    /// In-memory plaintext that has not been sealed yet. Sealed on the
276    /// write path.
277    Plain(String),
278    /// Base64 ciphertext, as stored in the DB. Decrypted lazily on
279    /// `reveal()`.
280    Sealed(String),
281}
282
283impl<T> Masked<T> {
284    /// Construct from plaintext. The value is sealed when it's written to
285    /// the database, not now.
286    pub fn new(plaintext: impl Into<String>) -> Self {
287        Self {
288            inner: MaskInner::Plain(plaintext.into()),
289            _marker: std::marker::PhantomData,
290        }
291    }
292
293    /// Reveal the plaintext. For an in-memory value this is the value
294    /// itself; for a value loaded from the DB this decrypts it (needs the
295    /// private key).
296    pub fn reveal(&self) -> Result<String, MaskError> {
297        match &self.inner {
298            MaskInner::Plain(p) => Ok(p.clone()),
299            MaskInner::Sealed(c) => ambient_open(c),
300        }
301    }
302
303    /// Whether the ambient keyring can reveal this value (has a private
304    /// key). An in-memory plaintext is always revealable.
305    pub fn is_revealable(&self) -> bool {
306        match &self.inner {
307            MaskInner::Plain(_) => true,
308            MaskInner::Sealed(_) => keyring()
309                .ok()
310                .and_then(|opt| opt)
311                .map(MaskKeyring::can_reveal)
312                .unwrap_or(false),
313        }
314    }
315
316    /// The stored representation — base64 ciphertext for a sealed value,
317    /// or the freshly-sealed ciphertext for an in-memory value. This is
318    /// what the sqlx `Encode` path binds.
319    fn to_stored(&self) -> Result<String, MaskError> {
320        match &self.inner {
321            MaskInner::Plain(p) => ambient_seal(p),
322            MaskInner::Sealed(c) => Ok(c.clone()),
323        }
324    }
325}
326
327impl<T> Default for Masked<T> {
328    /// An empty masked value (empty plaintext). Encodes to a sealed empty
329    /// string on write.
330    fn default() -> Self {
331        Masked::new(String::new())
332    }
333}
334
335impl<T> std::fmt::Debug for Masked<T> {
336    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
337        f.write_str("Masked(••••••)")
338    }
339}
340
341impl<T> std::fmt::Display for Masked<T> {
342    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
343        f.write_str(REDACTED)
344    }
345}
346
347impl<T> From<String> for Masked<T> {
348    fn from(plaintext: String) -> Self {
349        Masked::new(plaintext)
350    }
351}
352
353impl<T> From<&str> for Masked<T> {
354    fn from(plaintext: &str) -> Self {
355        Masked::new(plaintext)
356    }
357}
358
359// ---- serde: redact on the way out, treat input as plaintext ----
360
361impl<T> Serialize for Masked<T> {
362    /// Serialize as the **stored ciphertext** (sealing in-memory plaintext
363    /// first). This is load-bearing: the ORM write path binds values via
364    /// `serde_json::to_value(instance)`, so the serialized form *is* what
365    /// lands in the column — it must be ciphertext, not plaintext and not
366    /// the redaction marker. The plaintext therefore never leaves the
367    /// process through serde; a REST response carries an opaque encrypted
368    /// blob (hide the field with the REST serializer's `.hide(...)` for a
369    /// clean response). `Debug` / `Display` stay redacted for logs.
370    ///
371    /// Fails the serialize if no keyring is configured — that surfaces a
372    /// missing `UMBRAL_MASK_PUBLIC_KEY` loudly at write time instead of
373    /// silently storing plaintext.
374    fn serialize<S: Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
375        let stored = self.to_stored().map_err(serde::ser::Error::custom)?;
376        s.serialize_str(&stored)
377    }
378}
379
380impl<'de, T> Deserialize<'de> for Masked<T> {
381    /// A JSON / form string is read as new plaintext (to be sealed on
382    /// write). The redaction marker round-trips to an empty value so a
383    /// REST client echoing a redacted field back doesn't overwrite the
384    /// stored ciphertext with `"••••••"`.
385    fn deserialize<D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
386        let s = String::deserialize(d)?;
387        if s == REDACTED {
388            // Echoed-back redaction: treat as "no change" → empty plain.
389            Ok(Masked::new(String::new()))
390        } else {
391            Ok(Masked::new(s))
392        }
393    }
394}
395
396// ---- sqlx: encrypt on encode, store ciphertext on decode ----
397
398macro_rules! impl_masked_sqlx {
399    ($db:ty, $valueref:ty, $argbuf:ty) => {
400        impl<T> sqlx::Type<$db> for Masked<T> {
401            fn type_info() -> <$db as sqlx::Database>::TypeInfo {
402                <String as sqlx::Type<$db>>::type_info()
403            }
404            fn compatible(ty: &<$db as sqlx::Database>::TypeInfo) -> bool {
405                <String as sqlx::Type<$db>>::compatible(ty)
406            }
407        }
408
409        impl<'r, T> sqlx::Decode<'r, $db> for Masked<T> {
410            fn decode(value: $valueref) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
411                let ciphertext = <String as sqlx::Decode<$db>>::decode(value)?;
412                Ok(Masked {
413                    inner: MaskInner::Sealed(ciphertext),
414                    _marker: std::marker::PhantomData,
415                })
416            }
417        }
418
419        impl<'q, T> sqlx::Encode<'q, $db> for Masked<T> {
420            fn encode_by_ref(
421                &self,
422                buf: &mut $argbuf,
423            ) -> Result<sqlx::encode::IsNull, Box<dyn std::error::Error + Send + Sync>> {
424                let stored = self.to_stored()?;
425                <String as sqlx::Encode<'q, $db>>::encode_by_ref(&stored, buf)
426            }
427        }
428    };
429}
430
431impl_masked_sqlx!(
432    sqlx::Sqlite,
433    sqlx::sqlite::SqliteValueRef<'r>,
434    <sqlx::Sqlite as sqlx::Database>::ArgumentBuffer<'q>
435);
436impl_masked_sqlx!(
437    sqlx::Postgres,
438    sqlx::postgres::PgValueRef<'r>,
439    <sqlx::Postgres as sqlx::Database>::ArgumentBuffer<'q>
440);
441
442#[cfg(test)]
443mod tests {
444    use super::*;
445
446    fn test_keyring() -> MaskKeyring {
447        let (public, secret) = MaskKeyring::generate();
448        MaskKeyring::from_base64(&public, Some(&secret)).unwrap()
449    }
450
451    #[test]
452    fn seal_open_round_trips() {
453        let kr = test_keyring();
454        let sealed = kr.seal(b"+254712345678");
455        assert_ne!(sealed, "+254712345678", "stored form is not plaintext");
456        assert_eq!(kr.open(&sealed).unwrap(), "+254712345678");
457    }
458
459    #[test]
460    fn each_seal_is_distinct_ciphertext() {
461        // Fresh ephemeral keypair + nonce per call → two encryptions of
462        // the same plaintext differ, but both decrypt to it.
463        let kr = test_keyring();
464        let a = kr.seal(b"secret");
465        let b = kr.seal(b"secret");
466        assert_ne!(a, b, "ephemeral keypair makes ciphertext non-deterministic");
467        assert_eq!(kr.open(&a).unwrap(), "secret");
468        assert_eq!(kr.open(&b).unwrap(), "secret");
469    }
470
471    #[test]
472    fn public_key_only_cannot_open() {
473        let (public, secret) = MaskKeyring::generate();
474        let write_only = MaskKeyring::from_base64(&public, None).unwrap();
475        let sealed = write_only.seal(b"pii");
476        assert_eq!(write_only.open(&sealed), Err(MaskError::NoPrivateKey));
477        // The full keyring (with the private key) can still read it.
478        let full = MaskKeyring::from_base64(&public, Some(&secret)).unwrap();
479        assert_eq!(full.open(&sealed).unwrap(), "pii");
480    }
481
482    #[test]
483    fn wrong_key_fails_to_decrypt() {
484        let a = test_keyring();
485        let b = test_keyring();
486        let sealed = a.seal(b"private");
487        assert_eq!(b.open(&sealed), Err(MaskError::Decrypt));
488    }
489
490    #[test]
491    fn masked_redacts_in_debug_and_display() {
492        // Logs never leak: Debug + Display are redacted with no keyring
493        // involved at all.
494        let m: Masked = Masked::new("0712-secret");
495        assert_eq!(m.to_string(), REDACTED, "Display is redacted");
496        assert!(format!("{m:?}").contains("••••••"), "Debug is redacted");
497    }
498
499    #[test]
500    fn serialize_emits_ciphertext_not_plaintext() {
501        // The ORM write path binds via `serde_json::to_value`, so serde
502        // must emit the sealed ciphertext (not plaintext, not the
503        // redaction marker). This needs the ambient keyring.
504        let (public, secret) = MaskKeyring::generate();
505        set_mask_keyring(MaskKeyring::from_base64(&public, Some(&secret)).unwrap());
506        let m: Masked = Masked::new("0712-secret");
507        let json = serde_json::to_string(&m).unwrap();
508        assert!(
509            !json.contains("0712-secret"),
510            "serialized form must not be the plaintext"
511        );
512        assert_ne!(
513            json,
514            format!("\"{REDACTED}\""),
515            "serialized form is ciphertext, not the redaction marker"
516        );
517    }
518
519    #[test]
520    fn in_memory_plaintext_reveals_without_keyring() {
521        // A freshly-constructed Masked is plaintext in memory: reveal
522        // returns it directly, no decryption (so no keyring needed).
523        let m: Masked = Masked::new("hello");
524        assert_eq!(m.reveal().unwrap(), "hello");
525    }
526}