Skip to main content

keyhog_core/
credential.rs

1//! Opaque, zeroize-on-drop credential bytes.
2//!
3//! Debt bucket (`#![allow(missing_docs)]` below): 7 items predating the crate
4//! floor raising `missing_docs` to `warn`. Remove once each carries a doc.
5//!
6//! Replaces the previous `Arc<str>` credential field with a type that:
7//!
8//! 1. Zeroes its bytes on drop (`zeroize` crate). Heap pages keyhog freed
9//!    while a scan was in flight no longer leak credentials to the next
10//!    allocator request, swap, or post-mortem core dump.
11//! 2. Refuses `Debug` / `Display` printing - every leak path through `{:?}`
12//!    or `{}` becomes `<redacted N bytes>` instead of the bytes themselves.
13//!    To get the bytes you must call `expose_secret()` explicitly, which
14//!    grep'ing the codebase for can audit every credential touch site.
15//! 3. Is `Clone` and serializable via `serde` (uses the `expose_secret()`
16//!    bytes for `Serialize`, decodes back to a fresh `Credential` for
17//!    `Deserialize`). The serialization channel is the responsibility of
18//!    the caller - find emitters that go to disk/JSON and either redact
19//!    them or wrap the entire output in EnvSeal seal.
20//!
21//! When EnvSeal embeds keyhog, this type is the only place credential
22//! bytes ever appear in process memory; an mlock + memfd backing can be
23//! added behind the `lockdown` feature gate without touching call sites.
24
25#![allow(missing_docs)]
26
27use serde::{Deserialize, Deserializer, Serialize, Serializer};
28use std::cmp::Ordering;
29use std::hash::{Hash, Hasher};
30use std::sync::Arc;
31use zeroize::Zeroizing;
32
33/// Opaque credential bytes. The inner `Arc<Zeroizing<Box<[u8]>>>` clones are
34/// cheap (refcount bump) but every owning `Credential` zeroizes on drop.
35/// `Arc` lets the engine intern identical credentials without copying;
36/// when the last ref drops, `Zeroizing<Box<[u8]>>` overwrites the heap
37/// allocation before `Box::drop` returns it to the allocator.
38#[derive(Clone)]
39pub struct Credential {
40    inner: Arc<Zeroizing<Box<[u8]>>>,
41}
42
43impl Credential {
44    /// Build a `Credential` from raw bytes. The bytes are copied into a
45    /// fresh `Zeroizing<Box<[u8]>>` and the input slice is unchanged
46    /// (caller is responsible for zeroizing whatever it came from).
47    #[must_use]
48    pub fn from_bytes(bytes: &[u8]) -> Self {
49        Self {
50            inner: Arc::new(Zeroizing::new(bytes.to_vec().into_boxed_slice())),
51        }
52    }
53
54    /// Build a `Credential` from a borrowed `str`. Same semantics as
55    /// `from_bytes` - bytes are copied into the zeroizing allocation.
56    /// Named `from_text` (not `from_str`) to avoid the
57    /// `clippy::should_implement_trait` lint and to keep the API
58    /// distinct from `core::str::FromStr` (which has different error
59    /// semantics - we never fail to construct a Credential).
60    #[must_use]
61    pub fn from_text(s: &str) -> Self {
62        Self::from_bytes(s.as_bytes())
63    }
64
65    /// Length in bytes.
66    #[must_use]
67    pub fn len(&self) -> usize {
68        self.inner.len()
69    }
70
71    #[must_use]
72    pub fn is_empty(&self) -> bool {
73        self.inner.is_empty()
74    }
75
76    /// Expose the underlying bytes. Every call site MUST be auditable -
77    /// `git grep expose_secret` should surface every place credentials
78    /// leave the opaque wrapper. Treat each one as a security review item.
79    ///
80    /// Returns a `&[u8]` rather than `&str` because credentials may be
81    /// non-UTF-8 (binary-encoded keys, raw private-key bytes, etc).
82    #[must_use]
83    pub fn expose_secret(&self) -> &[u8] {
84        &self.inner
85    }
86
87    /// Expose the credential as a `&str` if it's valid UTF-8, otherwise
88    /// `None`. Most production credentials ARE valid UTF-8 (provider keys,
89    /// tokens, base64) so this is the common path.
90    #[must_use]
91    pub fn expose_str(&self) -> Option<&str> {
92        std::str::from_utf8(&self.inner).ok()
93    }
94}
95
96impl From<&str> for Credential {
97    fn from(s: &str) -> Self {
98        Self::from_text(s)
99    }
100}
101
102impl From<String> for Credential {
103    fn from(s: String) -> Self {
104        // The input `String`'s buffer is dropped without zeroizing - the
105        // caller should ideally pass `&str` so the bytes never sit in a
106        // non-zeroizing `String`. We do the right thing for our own
107        // allocation either way.
108        Self::from_bytes(s.as_bytes())
109    }
110}
111
112impl From<&[u8]> for Credential {
113    fn from(b: &[u8]) -> Self {
114        Self::from_bytes(b)
115    }
116}
117
118impl From<Vec<u8>> for Credential {
119    fn from(v: Vec<u8>) -> Self {
120        Self::from_bytes(&v)
121    }
122}
123
124impl PartialEq for Credential {
125    fn eq(&self, other: &Self) -> bool {
126        // Constant-time equality. Credentials are compared during dedup
127        // and inflight de-duplication; using `==` on naked bytes leaks
128        // information through CPU branch timing in pathological cases.
129        // The cost is one extra XOR per byte vs `==`, negligible at the
130        // sizes of credentials (<1 KiB typical).
131        if self.inner.len() != other.inner.len() {
132            return false;
133        }
134        let mut diff: u8 = 0;
135        for (a, b) in self.inner.iter().zip(other.inner.iter()) {
136            diff |= a ^ b;
137        }
138        diff == 0
139    }
140}
141
142impl Eq for Credential {}
143
144impl PartialOrd for Credential {
145    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
146        Some(self.cmp(other))
147    }
148}
149
150impl Ord for Credential {
151    fn cmp(&self, other: &Self) -> Ordering {
152        self.inner
153            .as_ref()
154            .as_ref()
155            .cmp(other.inner.as_ref().as_ref())
156    }
157}
158
159impl Hash for Credential {
160    fn hash<H: Hasher>(&self, state: &mut H) {
161        self.inner.as_ref().as_ref().hash(state);
162    }
163}
164
165impl std::fmt::Debug for Credential {
166    /// Refuse to format the bytes. This is a compile-time leak guard -
167    /// every place that did `eprintln!("{:?}", cred)` or `tracing::error!(?cred)`
168    /// now prints `Credential(<redacted N bytes>)` instead of the secret.
169    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170        write!(f, "Credential(<redacted {} bytes>)", self.inner.len())
171    }
172}
173
174impl std::fmt::Display for Credential {
175    /// Same redaction as `Debug` - `format!("{}", cred)` returns the
176    /// redacted form, never the bytes.
177    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
178        write!(f, "<redacted {} bytes>", self.inner.len())
179    }
180}
181
182impl Serialize for Credential {
183    /// Serialize as a tagged JSON object so the encoding is unambiguous.
184    /// kimi-wave2 §Critical: the previous `"b64:<base64>"` string-prefix
185    /// scheme round-tripped a UTF-8 credential like `"b64:SGVsbG8="`
186    /// (a literal user-typed value) through the deserializer as if it
187    /// were base64-encoded bytes, silently corrupting it. The tagged
188    /// variant `{"text":"…"}` / `{"b64":"…"}` cannot be confused with
189    /// either form.
190    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
191        use serde::ser::SerializeMap;
192        let mut m = serializer.serialize_map(Some(1))?;
193        match self.expose_str() {
194            Some(s) => m.serialize_entry("text", s)?,
195            None => m.serialize_entry("b64", &base64_encode(&self.inner))?,
196        }
197        m.end()
198    }
199}
200
201impl<'de> Deserialize<'de> for Credential {
202    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
203        // Accept the new tagged form (preferred) OR the legacy
204        // `b64:<base64>` / plain string forms (so on-disk artifacts
205        // from earlier versions still load). The legacy ambiguity is
206        // exactly what kimi-wave2 §Critical flagged; new writers must
207        // use the tagged form.
208        #[derive(Deserialize)]
209        #[serde(untagged)]
210        enum Wire {
211            Tagged {
212                #[serde(default)]
213                text: Option<String>,
214                #[serde(default)]
215                b64: Option<String>,
216            },
217            Legacy(String),
218        }
219        match Wire::deserialize(deserializer)? {
220            Wire::Tagged {
221                text: Some(t),
222                b64: None,
223            } => Ok(Credential::from_text(&t)),
224            Wire::Tagged {
225                text: None,
226                b64: Some(b),
227            } => {
228                let bytes = crate::encoding::decode_standard_base64(&b)
229                    .map_err(serde::de::Error::custom)?;
230                Ok(Credential::from_bytes(&bytes))
231            }
232            Wire::Tagged { .. } => Err(serde::de::Error::custom(
233                "Credential must specify exactly one of `text` or `b64`",
234            )),
235            Wire::Legacy(s) => {
236                if let Some(rest) = s.strip_prefix("b64:") {
237                    let bytes = crate::encoding::decode_standard_base64(rest)
238                        .map_err(serde::de::Error::custom)?;
239                    Ok(Credential::from_bytes(&bytes))
240                } else {
241                    Ok(Credential::from_text(&s))
242                }
243            }
244        }
245    }
246}
247
248/// Minimal base64 encoder so this module doesn't need a `base64` crate dep.
249fn base64_encode(input: &[u8]) -> String {
250    const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
251    let mut out = String::with_capacity(input.len().div_ceil(3) * 4);
252    for chunk in input.chunks(3) {
253        let b0 = chunk[0];
254        let b1 = chunk.get(1).copied().unwrap_or(0);
255        let b2 = chunk.get(2).copied().unwrap_or(0);
256        out.push(TABLE[(b0 >> 2) as usize] as char);
257        out.push(TABLE[(((b0 & 0x03) << 4) | (b1 >> 4)) as usize] as char);
258        if chunk.len() > 1 {
259            out.push(TABLE[(((b1 & 0x0F) << 2) | (b2 >> 6)) as usize] as char);
260        } else {
261            out.push('=');
262        }
263        if chunk.len() > 2 {
264            out.push(TABLE[(b2 & 0x3F) as usize] as char);
265        } else {
266            out.push('=');
267        }
268    }
269    out
270}
271
272/// A heap-allocated string that is zeroized on drop.
273#[derive(Clone, Default)]
274pub struct SensitiveString {
275    inner: Arc<Zeroizing<String>>,
276}
277
278impl SensitiveString {
279    pub fn new(s: String) -> Self {
280        Self {
281            inner: Arc::new(Zeroizing::new(s)),
282        }
283    }
284
285    pub fn join(parts: &[SensitiveString], sep: &str) -> Self {
286        let mut s = String::new();
287        for (i, p) in parts.iter().enumerate() {
288            if i > 0 {
289                s.push_str(sep);
290            }
291            s.push_str(p.as_str());
292        }
293        Self::new(s)
294    }
295
296    pub fn as_str(&self) -> &str {
297        self.inner.as_str()
298    }
299
300    pub fn as_bytes(&self) -> &[u8] {
301        self.inner.as_bytes()
302    }
303
304    pub fn len(&self) -> usize {
305        self.inner.len()
306    }
307
308    pub fn is_empty(&self) -> bool {
309        self.inner.is_empty()
310    }
311}
312
313impl std::ops::Deref for SensitiveString {
314    type Target = str;
315    fn deref(&self) -> &Self::Target {
316        self.as_str()
317    }
318}
319
320impl AsRef<str> for SensitiveString {
321    fn as_ref(&self) -> &str {
322        self.as_str()
323    }
324}
325
326impl From<String> for SensitiveString {
327    fn from(s: String) -> Self {
328        Self::new(s)
329    }
330}
331
332impl From<&str> for SensitiveString {
333    fn from(s: &str) -> Self {
334        Self::new(s.to_string())
335    }
336}
337
338impl From<&String> for SensitiveString {
339    fn from(s: &String) -> Self {
340        Self::new(s.clone())
341    }
342}
343
344impl std::fmt::Display for SensitiveString {
345    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
346        write!(f, "{}", self.as_str())
347    }
348}
349
350impl std::fmt::Debug for SensitiveString {
351    /// Refuse to print the inner string. `SensitiveString` backs scan-chunk
352    /// data (`Chunk::data`), which can contain raw credential material -
353    /// decoded secrets, `.env` lines, archive-entry bytes. The previous impl
354    /// emitted `SensitiveString("<raw content>")`, leaking those bytes into
355    /// any `{:?}` print, `tracing::debug!(?chunk)` span, or panic message.
356    /// Mirror the `Credential::Debug` byte-count redaction (kimi-wave1
357    /// finding 1.1). Note: `Display` intentionally still exposes the bytes -
358    /// callers that genuinely need the content format with `{}`, which is the
359    /// auditable surface; `{:?}` must never be one.
360    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
361        write!(f, "SensitiveString(<redacted {} bytes>)", self.inner.len())
362    }
363}
364
365impl Serialize for SensitiveString {
366    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
367        self.as_str().serialize(serializer)
368    }
369}
370
371impl<'de> Deserialize<'de> for SensitiveString {
372    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
373        String::deserialize(deserializer).map(Self::new)
374    }
375}