keyhog_core/credential.rs
1//! Opaque, zeroize-on-drop credential bytes.
2//!
3//! Debt bucket (`#![allow(missing_docs)]` below): 7 items predating the crate
4//! floor raising `missing_docs` to `warn`. Remove once each carries a doc.
5//!
6//! Replaces the previous `Arc<str>` credential field with a type that:
7//!
8//! 1. Zeroes its bytes on drop (`zeroize` crate). Heap pages keyhog freed
9//! while a scan was in flight no longer leak credentials to the next
10//! allocator request, swap, or post-mortem core dump.
11//! 2. Refuses `Debug` / `Display` printing - every leak path through `{:?}`
12//! or `{}` becomes `<redacted N bytes>` instead of the bytes themselves.
13//! To get the bytes you must call `expose_secret()` explicitly, which
14//! grep'ing the codebase for can audit every credential touch site.
15//! 3. Is `Clone` and serializable via `serde` (uses the `expose_secret()`
16//! bytes for `Serialize`, decodes back to a fresh `Credential` for
17//! `Deserialize`). The serialization channel is the responsibility of
18//! the caller - find emitters that go to disk/JSON and either redact
19//! them or wrap the entire output in EnvSeal seal.
20//!
21//! When EnvSeal embeds keyhog, this type is the only place credential
22//! bytes ever appear in process memory; an mlock + memfd backing can be
23//! added behind the `lockdown` feature gate without touching call sites.
24
25#![allow(missing_docs)]
26
27use serde::{Deserialize, Deserializer, Serialize, Serializer};
28use std::cmp::Ordering;
29use std::hash::{Hash, Hasher};
30use std::sync::Arc;
31use zeroize::Zeroizing;
32
33/// Opaque credential bytes. The inner `Arc<Zeroizing<Box<[u8]>>>` clones are
34/// cheap (refcount bump) but every owning `Credential` zeroizes on drop.
35/// `Arc` lets the engine intern identical credentials without copying;
36/// when the last ref drops, `Zeroizing<Box<[u8]>>` overwrites the heap
37/// allocation before `Box::drop` returns it to the allocator.
38#[derive(Clone)]
39pub struct Credential {
40 inner: Arc<Zeroizing<Box<[u8]>>>,
41}
42
43impl Credential {
44 /// Build a `Credential` from raw bytes. The bytes are copied into a
45 /// fresh `Zeroizing<Box<[u8]>>` and the input slice is unchanged
46 /// (caller is responsible for zeroizing whatever it came from).
47 #[must_use]
48 pub fn from_bytes(bytes: &[u8]) -> Self {
49 Self {
50 inner: Arc::new(Zeroizing::new(bytes.to_vec().into_boxed_slice())),
51 }
52 }
53
54 /// Build a `Credential` from a borrowed `str`. Same semantics as
55 /// `from_bytes` - bytes are copied into the zeroizing allocation.
56 /// Named `from_text` (not `from_str`) to avoid the
57 /// `clippy::should_implement_trait` lint and to keep the API
58 /// distinct from `core::str::FromStr` (which has different error
59 /// semantics - we never fail to construct a Credential).
60 #[must_use]
61 pub fn from_text(s: &str) -> Self {
62 Self::from_bytes(s.as_bytes())
63 }
64
65 /// Length in bytes.
66 #[must_use]
67 pub fn len(&self) -> usize {
68 self.inner.len()
69 }
70
71 #[must_use]
72 pub fn is_empty(&self) -> bool {
73 self.inner.is_empty()
74 }
75
76 /// Expose the underlying bytes. Every call site MUST be auditable -
77 /// `git grep expose_secret` should surface every place credentials
78 /// leave the opaque wrapper. Treat each one as a security review item.
79 ///
80 /// Returns a `&[u8]` rather than `&str` because credentials may be
81 /// non-UTF-8 (binary-encoded keys, raw private-key bytes, etc).
82 #[must_use]
83 pub fn expose_secret(&self) -> &[u8] {
84 &self.inner
85 }
86
87 /// Expose the credential as a `&str` if it's valid UTF-8, otherwise
88 /// `None`. Most production credentials ARE valid UTF-8 (provider keys,
89 /// tokens, base64) so this is the common path.
90 #[must_use]
91 pub fn expose_str(&self) -> Option<&str> {
92 std::str::from_utf8(&self.inner).ok()
93 }
94}
95
96impl From<&str> for Credential {
97 fn from(s: &str) -> Self {
98 Self::from_text(s)
99 }
100}
101
102impl From<String> for Credential {
103 fn from(s: String) -> Self {
104 // The input `String`'s buffer is dropped without zeroizing - the
105 // caller should ideally pass `&str` so the bytes never sit in a
106 // non-zeroizing `String`. We do the right thing for our own
107 // allocation either way.
108 Self::from_bytes(s.as_bytes())
109 }
110}
111
112impl From<&[u8]> for Credential {
113 fn from(b: &[u8]) -> Self {
114 Self::from_bytes(b)
115 }
116}
117
118impl From<Vec<u8>> for Credential {
119 fn from(v: Vec<u8>) -> Self {
120 Self::from_bytes(&v)
121 }
122}
123
124impl PartialEq for Credential {
125 fn eq(&self, other: &Self) -> bool {
126 // Constant-time equality. Credentials are compared during dedup
127 // and inflight de-duplication; using `==` on naked bytes leaks
128 // information through CPU branch timing in pathological cases.
129 // The cost is one extra XOR per byte vs `==`, negligible at the
130 // sizes of credentials (<1 KiB typical).
131 if self.inner.len() != other.inner.len() {
132 return false;
133 }
134 let mut diff: u8 = 0;
135 for (a, b) in self.inner.iter().zip(other.inner.iter()) {
136 diff |= a ^ b;
137 }
138 diff == 0
139 }
140}
141
142impl Eq for Credential {}
143
144impl PartialOrd for Credential {
145 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
146 Some(self.cmp(other))
147 }
148}
149
150impl Ord for Credential {
151 fn cmp(&self, other: &Self) -> Ordering {
152 self.inner
153 .as_ref()
154 .as_ref()
155 .cmp(other.inner.as_ref().as_ref())
156 }
157}
158
159impl Hash for Credential {
160 fn hash<H: Hasher>(&self, state: &mut H) {
161 self.inner.as_ref().as_ref().hash(state);
162 }
163}
164
165impl std::fmt::Debug for Credential {
166 /// Refuse to format the bytes. This is a compile-time leak guard -
167 /// every place that did `eprintln!("{:?}", cred)` or `tracing::error!(?cred)`
168 /// now prints `Credential(<redacted N bytes>)` instead of the secret.
169 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170 write!(f, "Credential(<redacted {} bytes>)", self.inner.len())
171 }
172}
173
174impl std::fmt::Display for Credential {
175 /// Same redaction as `Debug` - `format!("{}", cred)` returns the
176 /// redacted form, never the bytes.
177 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
178 write!(f, "<redacted {} bytes>", self.inner.len())
179 }
180}
181
182impl Serialize for Credential {
183 /// Serialize as a tagged JSON object so the encoding is unambiguous.
184 /// kimi-wave2 §Critical: the previous `"b64:<base64>"` string-prefix
185 /// scheme round-tripped a UTF-8 credential like `"b64:SGVsbG8="`
186 /// (a literal user-typed value) through the deserializer as if it
187 /// were base64-encoded bytes, silently corrupting it. The tagged
188 /// variant `{"text":"…"}` / `{"b64":"…"}` cannot be confused with
189 /// either form.
190 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
191 use serde::ser::SerializeMap;
192 let mut m = serializer.serialize_map(Some(1))?;
193 match self.expose_str() {
194 Some(s) => m.serialize_entry("text", s)?,
195 None => m.serialize_entry("b64", &base64_encode(&self.inner))?,
196 }
197 m.end()
198 }
199}
200
201impl<'de> Deserialize<'de> for Credential {
202 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
203 // Accept the new tagged form (preferred) OR the legacy
204 // `b64:<base64>` / plain string forms (so on-disk artifacts
205 // from earlier versions still load). The legacy ambiguity is
206 // exactly what kimi-wave2 §Critical flagged; new writers must
207 // use the tagged form.
208 #[derive(Deserialize)]
209 #[serde(untagged)]
210 enum Wire {
211 Tagged {
212 #[serde(default)]
213 text: Option<String>,
214 #[serde(default)]
215 b64: Option<String>,
216 },
217 Legacy(String),
218 }
219 match Wire::deserialize(deserializer)? {
220 Wire::Tagged {
221 text: Some(t),
222 b64: None,
223 } => Ok(Credential::from_text(&t)),
224 Wire::Tagged {
225 text: None,
226 b64: Some(b),
227 } => {
228 let bytes = crate::encoding::decode_standard_base64(&b)
229 .map_err(serde::de::Error::custom)?;
230 Ok(Credential::from_bytes(&bytes))
231 }
232 Wire::Tagged { .. } => Err(serde::de::Error::custom(
233 "Credential must specify exactly one of `text` or `b64`",
234 )),
235 Wire::Legacy(s) => {
236 if let Some(rest) = s.strip_prefix("b64:") {
237 let bytes = crate::encoding::decode_standard_base64(rest)
238 .map_err(serde::de::Error::custom)?;
239 Ok(Credential::from_bytes(&bytes))
240 } else {
241 Ok(Credential::from_text(&s))
242 }
243 }
244 }
245 }
246}
247
248/// Minimal base64 encoder so this module doesn't need a `base64` crate dep.
249fn base64_encode(input: &[u8]) -> String {
250 const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
251 let mut out = String::with_capacity(input.len().div_ceil(3) * 4);
252 for chunk in input.chunks(3) {
253 let b0 = chunk[0];
254 let b1 = chunk.get(1).copied().unwrap_or(0);
255 let b2 = chunk.get(2).copied().unwrap_or(0);
256 out.push(TABLE[(b0 >> 2) as usize] as char);
257 out.push(TABLE[(((b0 & 0x03) << 4) | (b1 >> 4)) as usize] as char);
258 if chunk.len() > 1 {
259 out.push(TABLE[(((b1 & 0x0F) << 2) | (b2 >> 6)) as usize] as char);
260 } else {
261 out.push('=');
262 }
263 if chunk.len() > 2 {
264 out.push(TABLE[(b2 & 0x3F) as usize] as char);
265 } else {
266 out.push('=');
267 }
268 }
269 out
270}
271
272/// A heap-allocated string that is zeroized on drop.
273#[derive(Clone, Default)]
274pub struct SensitiveString {
275 inner: Arc<Zeroizing<String>>,
276}
277
278impl SensitiveString {
279 pub fn new(s: String) -> Self {
280 Self {
281 inner: Arc::new(Zeroizing::new(s)),
282 }
283 }
284
285 pub fn join(parts: &[SensitiveString], sep: &str) -> Self {
286 let mut s = String::new();
287 for (i, p) in parts.iter().enumerate() {
288 if i > 0 {
289 s.push_str(sep);
290 }
291 s.push_str(p.as_str());
292 }
293 Self::new(s)
294 }
295
296 pub fn as_str(&self) -> &str {
297 self.inner.as_str()
298 }
299
300 pub fn as_bytes(&self) -> &[u8] {
301 self.inner.as_bytes()
302 }
303
304 pub fn len(&self) -> usize {
305 self.inner.len()
306 }
307
308 pub fn is_empty(&self) -> bool {
309 self.inner.is_empty()
310 }
311}
312
313impl std::ops::Deref for SensitiveString {
314 type Target = str;
315 fn deref(&self) -> &Self::Target {
316 self.as_str()
317 }
318}
319
320impl AsRef<str> for SensitiveString {
321 fn as_ref(&self) -> &str {
322 self.as_str()
323 }
324}
325
326impl From<String> for SensitiveString {
327 fn from(s: String) -> Self {
328 Self::new(s)
329 }
330}
331
332impl From<&str> for SensitiveString {
333 fn from(s: &str) -> Self {
334 Self::new(s.to_string())
335 }
336}
337
338impl From<&String> for SensitiveString {
339 fn from(s: &String) -> Self {
340 Self::new(s.clone())
341 }
342}
343
344impl std::fmt::Display for SensitiveString {
345 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
346 write!(f, "{}", self.as_str())
347 }
348}
349
350impl std::fmt::Debug for SensitiveString {
351 /// Refuse to print the inner string. `SensitiveString` backs scan-chunk
352 /// data (`Chunk::data`), which can contain raw credential material -
353 /// decoded secrets, `.env` lines, archive-entry bytes. The previous impl
354 /// emitted `SensitiveString("<raw content>")`, leaking those bytes into
355 /// any `{:?}` print, `tracing::debug!(?chunk)` span, or panic message.
356 /// Mirror the `Credential::Debug` byte-count redaction (kimi-wave1
357 /// finding 1.1). Note: `Display` intentionally still exposes the bytes -
358 /// callers that genuinely need the content format with `{}`, which is the
359 /// auditable surface; `{:?}` must never be one.
360 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
361 write!(f, "SensitiveString(<redacted {} bytes>)", self.inner.len())
362 }
363}
364
365impl Serialize for SensitiveString {
366 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
367 self.as_str().serialize(serializer)
368 }
369}
370
371impl<'de> Deserialize<'de> for SensitiveString {
372 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
373 String::deserialize(deserializer).map(Self::new)
374 }
375}