devboy_skills/trace/redact.rs
1//! Redaction of sensitive values before traces hit disk.
2//!
3//! Two mechanisms are layered:
4//!
5//! 1. Known credential shapes are masked regardless of where they
6//! appear in the tree. Currently: `ghp_`, `glpat-`, `pk_`, `sk-`,
7//! `xoxb-` / `xoxa-` / `xapp-`, `Bearer ` / `Basic ` (case-
8//! insensitive), plus a few other common prefixes. These all
9//! survive without knowing the configured credential set — useful
10//! when a token leaks into an error message, a git URL, or a
11//! user-supplied prompt.
12//! 2. Values of any string-valued environment variable whose name
13//! matches a sensitive suffix (`*_TOKEN` / `*_SECRET` / `*_KEY` /
14//! `*_PASSWORD` / `*_PASSPHRASE` / `AUTHORIZATION` / `COOKIE`) are
15//! masked — the redactor snapshots those at call time.
16//!
17//! Setting the `DEVBOY_TRACE_REDACTION=off` environment variable
18//! disables both passes for local debugging. Never default to off.
19//!
20//! ## Amortizing the env snapshot
21//!
22//! The top-level [`sanitize`] helper walks `std::env::vars()` on every
23//! call — fine for one-shot CLI invocations but wasteful inside a
24//! long-running producer like [`super::SessionTracer`] that writes
25//! many events. Build a [`Redactor`] once with
26//! [`Redactor::snapshot`] and reuse it for every event in the same
27//! session to pay the env scan just once.
28
29use std::collections::HashSet;
30
31use serde_json::Value;
32
33/// Redact sensitive data in `value`. Recursively walks maps and
34/// arrays. Strings are rewritten; numbers / bools / null pass through
35/// unchanged.
36///
37/// Each call snapshots `*_TOKEN` / `*_SECRET` / … env vars afresh so
38/// that tests using `temp_env::with_var` (and production callers that
39/// legitimately mutate the environment) see up-to-date state. Inside
40/// a long session, prefer [`Redactor::snapshot`] + [`Redactor::sanitize`].
41pub fn sanitize(value: Value) -> Value {
42 Redactor::snapshot().sanitize(value)
43}
44
45/// A reusable redactor that holds one env-var snapshot. Created via
46/// [`Redactor::snapshot`]; use once per long-running producer (e.g.
47/// one per `SessionTracer`) to avoid rescanning the environment on
48/// every event.
49#[derive(Debug, Clone)]
50pub struct Redactor {
51 enabled: bool,
52 secrets: HashSet<String>,
53}
54
55impl Redactor {
56 /// Capture the current set of sensitive env-var values and the
57 /// `DEVBOY_TRACE_REDACTION=off` opt-out state. Cheap to clone.
58 pub fn snapshot() -> Self {
59 if redaction_disabled() {
60 Self {
61 enabled: false,
62 secrets: HashSet::new(),
63 }
64 } else {
65 Self {
66 enabled: true,
67 secrets: known_env_secrets(),
68 }
69 }
70 }
71
72 /// Sanitize a single value using the captured env-var snapshot.
73 pub fn sanitize(&self, value: Value) -> Value {
74 if !self.enabled {
75 return value;
76 }
77 sanitize_with(&self.secrets, value)
78 }
79}
80
81fn redaction_disabled() -> bool {
82 match std::env::var("DEVBOY_TRACE_REDACTION") {
83 Ok(v) => matches!(v.to_lowercase().as_str(), "off" | "0" | "false" | "no"),
84 Err(_) => false,
85 }
86}
87
88fn sanitize_with(secrets: &HashSet<String>, value: Value) -> Value {
89 match value {
90 Value::String(s) => Value::String(redact_string(secrets, &s)),
91 Value::Array(xs) => {
92 Value::Array(xs.into_iter().map(|x| sanitize_with(secrets, x)).collect())
93 }
94 Value::Object(map) => {
95 let mut out = serde_json::Map::with_capacity(map.len());
96 for (k, v) in map {
97 // If the key itself hints at a secret, redact the whole
98 // value regardless of its type. This prevents structured
99 // leaks like `{"authorization": {"scheme": "Bearer",
100 // "value": "…"}}` where nested field names may not
101 // themselves trip the secret-key heuristic.
102 let new_val = if key_looks_secret(&k) {
103 Value::String("<redacted:secret-field>".to_string())
104 } else {
105 sanitize_with(secrets, v)
106 };
107 out.insert(k, new_val);
108 }
109 Value::Object(out)
110 }
111 other => other,
112 }
113}
114
115fn redact_string(secrets: &HashSet<String>, s: &str) -> String {
116 // 1. Exact env-var match.
117 if !s.is_empty() && secrets.contains(s) {
118 return "<redacted:credential>".to_string();
119 }
120 // 2. Known token prefixes. We search case-sensitively because every
121 // supported prefix is case-sensitive in practice.
122 if has_known_prefix(s) {
123 return "<redacted:token-pattern>".to_string();
124 }
125 // 3. Bearer / Basic schemes embedded inside a larger string. Don't
126 // rewrite the whole string — replace only the credential segment.
127 if let Some(rewritten) = mask_auth_header_segment(s) {
128 return rewritten;
129 }
130 s.to_string()
131}
132
133fn has_known_prefix(s: &str) -> bool {
134 // Case-sensitive prefixes. The publisher-defined provider tokens
135 // are all case-sensitive in the wild, so matching them strictly
136 // avoids redacting words that merely share the letters (e.g. an
137 // English sentence starting with "Ghp").
138 const CASE_SENSITIVE: &[&str] = &[
139 // GitHub PATs
140 "ghp_",
141 "github_pat_",
142 "gho_",
143 "ghu_",
144 "ghs_",
145 "ghr_",
146 // GitLab PATs
147 "glpat-",
148 // Publishable / secret key families shared across a few
149 // providers (Stripe, ClickUp, etc.). ADR-015 spec calls these
150 // out as a single `pk_` / `sk_` group — keep them generic.
151 "pk_",
152 "sk_",
153 // OpenAI-ish (also covers sk-ant-… via the `sk-` prefix).
154 "sk-",
155 // Slack
156 "xoxb-",
157 "xoxa-",
158 "xoxp-",
159 "xapp-",
160 ];
161 if CASE_SENSITIVE
162 .iter()
163 .any(|p| s.starts_with(p) && s.len() > p.len() + 8)
164 {
165 return true;
166 }
167 // Case-insensitive auth-scheme prefixes: HTTP scheme tokens are
168 // case-insensitive per RFC 7235, so `Bearer <tok>`, `bearer <tok>`
169 // and `BEARER <tok>` should all redact.
170 const SCHEME_CI: &[&str] = &["bearer ", "basic "];
171 let lower = s.to_ascii_lowercase();
172 SCHEME_CI
173 .iter()
174 .any(|p| lower.starts_with(p) && s.len() > p.len() + 8)
175}
176
177fn mask_auth_header_segment(s: &str) -> Option<String> {
178 // e.g. "Authorization: Bearer ghp_…" embedded inside a log line.
179 // HTTP auth schemes are case-insensitive (RFC 7235), so locate the
180 // needle in the lowercased copy but preserve the original casing
181 // of the scheme token in the rewritten output.
182 let lower = s.to_ascii_lowercase();
183 let needles = ["bearer ", "basic "];
184 for needle in needles {
185 if let Some(idx) = lower.find(needle) {
186 let head = &s[..idx];
187 let scheme = &s[idx..idx + needle.len()]; // original case preserved
188 // Credential runs until whitespace, comma, or semicolon.
189 let rest = &s[idx + needle.len()..];
190 let end = rest
191 .find(|c: char| c.is_whitespace() || c == ',' || c == ';')
192 .unwrap_or(rest.len());
193 if end >= 8 {
194 let tail = &rest[end..];
195 return Some(format!("{head}{scheme}<redacted:auth>{tail}"));
196 }
197 }
198 }
199 None
200}
201
202fn key_looks_secret(key: &str) -> bool {
203 let upper = key.to_ascii_uppercase();
204 const SUFFIXES: &[&str] = &[
205 "_TOKEN",
206 "_SECRET",
207 "_KEY",
208 "_PASSWORD",
209 "_PASSPHRASE",
210 "_AUTH",
211 ];
212 const EXACT: &[&str] = &["AUTHORIZATION", "COOKIE", "TOKEN", "SECRET", "PASSWORD"];
213 if EXACT.contains(&upper.as_str()) {
214 return true;
215 }
216 if SUFFIXES.iter().any(|suf| upper.ends_with(suf)) {
217 return true;
218 }
219 // Common devboy conventions.
220 // Use the upper-cased copy for the substring heuristic too, so
221 // mixed-case keys like `Password` / `Token` / `Secret` are caught
222 // consistently with the EXACT / SUFFIX branches above.
223 if upper.contains("PASSWORD") || upper.contains("SECRET") || upper.contains("TOKEN") {
224 return true;
225 }
226 false
227}
228
229fn known_env_secrets() -> HashSet<String> {
230 let mut out = HashSet::new();
231 for (name, value) in std::env::vars() {
232 if value.is_empty() {
233 continue;
234 }
235 if key_looks_secret(&name) {
236 out.insert(value);
237 }
238 }
239 out
240}
241
242#[cfg(test)]
243mod tests {
244 use super::*;
245 use serde_json::json;
246 use std::sync::Mutex;
247
248 /// Serialise every test in this module around the process-wide
249 /// environment. Two tests legitimately toggle
250 /// `DEVBOY_TRACE_REDACTION=off` via `temp_env::with_var`, and
251 /// `cargo test` runs the others concurrently — without this mutex
252 /// a sibling test's `off` setting can leak into an unrelated test
253 /// for the window it holds the var, making
254 /// `masks_bare_bearer_value_case_insensitive` and friends flake
255 /// on CI. The mutex is cheap (only contended during tests) and
256 /// keeps the production code path zero-overhead. Combined with
257 /// `temp_env::with_var`'s own save/restore logic this gives the
258 /// whole module deterministic env state.
259 static ENV_TEST_MUTEX: Mutex<()> = Mutex::new(());
260
261 /// Helper: acquire the module-wide env-serialisation lock and run
262 /// `f` inside a `temp_env` guard that explicitly UNsets
263 /// `DEVBOY_TRACE_REDACTION`. Used by every test that expects the
264 /// default (enabled) redactor. Without this wrapper a sibling
265 /// test's `DEVBOY_TRACE_REDACTION=off` setting could race in and
266 /// silently disable redaction mid-assertion.
267 fn with_clean_env<R>(f: impl FnOnce() -> R) -> R {
268 let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
269 temp_env::with_var("DEVBOY_TRACE_REDACTION", None::<&str>, f)
270 }
271
272 #[test]
273 fn masks_github_pat() {
274 with_clean_env(|| {
275 let v = json!({ "args": { "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" } });
276 let out = sanitize(v);
277 let s = serde_json::to_string(&out).unwrap();
278 assert!(!s.contains("ghp_aaaaaaaa"));
279 assert!(s.contains("<redacted"));
280 });
281 }
282
283 #[test]
284 fn masks_bearer_scheme_in_header_string() {
285 with_clean_env(|| {
286 let v = json!("Authorization: Bearer xxxxxxxxxxxxyyyyyyyyyyyy");
287 let out = sanitize(v);
288 let s = out.as_str().unwrap();
289 assert!(!s.contains("xxxxxxxxxxxxyyyyyyyyyyyy"), "got: {s}");
290 assert!(s.contains("<redacted"), "got: {s}");
291 });
292 }
293
294 #[test]
295 fn masks_by_key_name_even_when_value_looks_harmless() {
296 with_clean_env(|| {
297 // A value that does not match any known prefix but lives under
298 // a key called `password` must still be redacted.
299 let v = json!({ "password": "not-a-prefix" });
300 let out = sanitize(v);
301 assert_eq!(
302 out.get("password").and_then(|v| v.as_str()),
303 Some("<redacted:secret-field>")
304 );
305 });
306 }
307
308 #[test]
309 fn env_var_values_are_redacted_when_they_match_exactly() {
310 let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
311 temp_env::with_vars(
312 [
313 ("DEVBOY_TRACE_REDACTION", None::<&str>),
314 (
315 "DEVBOY_TEST_TOKEN",
316 Some("super-secret-value-nothing-matches"),
317 ),
318 ],
319 || {
320 let v = json!({ "note": "leaked: super-secret-value-nothing-matches" });
321 let out = sanitize(v);
322 // The exact-match secret replacement only fires when
323 // the value IS the secret — not when it's embedded in
324 // a larger string. Embedded leakage is the DLP case we
325 // don't attempt to solve (see the doc comment).
326 let note = out.get("note").and_then(|v| v.as_str()).unwrap();
327 assert_eq!(note, "leaked: super-secret-value-nothing-matches");
328
329 let v = json!({ "raw": "super-secret-value-nothing-matches" });
330 let out = sanitize(v);
331 assert_eq!(
332 out.get("raw").and_then(|v| v.as_str()),
333 Some("<redacted:credential>")
334 );
335 },
336 );
337 }
338
339 #[test]
340 fn short_strings_are_not_redacted_by_prefix_check() {
341 with_clean_env(|| {
342 // `ghp_` alone must not be redacted — only long PAT-shaped
343 // strings are. This matters for documentation and for the
344 // redaction marker itself.
345 let v = json!("ghp_");
346 assert_eq!(sanitize(v).as_str(), Some("ghp_"));
347 });
348 }
349
350 #[test]
351 fn redaction_can_be_disabled_via_env() {
352 let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
353 temp_env::with_var("DEVBOY_TRACE_REDACTION", Some("off"), || {
354 let v = json!({ "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" });
355 let out = sanitize(v.clone());
356 assert_eq!(out, v);
357 });
358 }
359
360 #[test]
361 fn masks_bearer_scheme_case_insensitive() {
362 with_clean_env(|| {
363 // HTTP schemes are case-insensitive per RFC 7235, so all of
364 // these variants must redact.
365 for header in [
366 "Authorization: Bearer xxxxxxxxxxxxyyyyyyyyyyyy",
367 "authorization: bearer xxxxxxxxxxxxyyyyyyyyyyyy",
368 "AUTHORIZATION: BEARER xxxxxxxxxxxxyyyyyyyyyyyy",
369 "authorization: BeArEr xxxxxxxxxxxxyyyyyyyyyyyy",
370 ] {
371 let out = sanitize(json!(header));
372 let s = out.as_str().unwrap();
373 assert!(
374 !s.contains("xxxxxxxxxxxxyyyyyyyyyyyy"),
375 "token leaked for header `{header}` → `{s}`"
376 );
377 assert!(
378 s.contains("<redacted"),
379 "no redaction marker for header `{header}` → `{s}`"
380 );
381 }
382 });
383 }
384
385 #[test]
386 fn masks_bare_bearer_value_case_insensitive() {
387 with_clean_env(|| {
388 // When the caller pasted just the `Bearer <token>` segment as
389 // a standalone value, the prefix check (not the header scanner)
390 // fires — must also be case-insensitive.
391 for raw in [
392 "Bearer abcdefghijklmnopqrstuvwx",
393 "bearer abcdefghijklmnopqrstuvwx",
394 "BEARER abcdefghijklmnopqrstuvwx",
395 "Basic YWxpY2U6aHVudGVyMjpkcmFnb24=",
396 ] {
397 let out = sanitize(json!(raw));
398 let s = out.as_str().unwrap();
399 assert!(s.contains("<redacted"), "not redacted: `{raw}` → `{s}`");
400 }
401 });
402 }
403
404 #[test]
405 fn masks_generic_pk_prefix() {
406 with_clean_env(|| {
407 // ADR-015 calls out a generic `pk_` prefix (not just
408 // `pk_live_` / `pk_test_`). Enough bytes after the prefix to
409 // clear the length guard so a bare `pk_` literal is left alone.
410 let v = json!({ "clickup_pk": "pk_abcdefghijklmnop" });
411 let out = sanitize(v);
412 assert_eq!(
413 out.get("clickup_pk").and_then(|v| v.as_str()),
414 Some("<redacted:token-pattern>"),
415 "generic pk_ prefix should redact"
416 );
417
418 // Short `pk_` literal stays untouched (e.g. in docs).
419 let doc = json!("pk_");
420 assert_eq!(sanitize(doc).as_str(), Some("pk_"));
421 });
422 }
423
424 #[test]
425 fn redactor_snapshot_amortizes_env_scan() {
426 let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
427 let redactor = temp_env::with_vars(
428 [
429 ("DEVBOY_TRACE_REDACTION", None::<&str>),
430 ("DEVBOY_REDACTOR_CACHE_TOKEN", Some("cached-token-zzzzzzzz")),
431 ],
432 Redactor::snapshot,
433 );
434 // The env var is gone at this point, but the snapshot remembers.
435 let out = redactor.sanitize(json!({ "raw": "cached-token-zzzzzzzz" }));
436 assert_eq!(
437 out.get("raw").and_then(|v| v.as_str()),
438 Some("<redacted:credential>")
439 );
440 }
441
442 #[test]
443 fn redactor_snapshot_respects_disable_env() {
444 let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
445 temp_env::with_var("DEVBOY_TRACE_REDACTION", Some("off"), || {
446 let redactor = Redactor::snapshot();
447 let v = json!({ "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" });
448 assert_eq!(redactor.sanitize(v.clone()), v);
449 });
450 }
451}