Skip to main content

devboy_skills/trace/
redact.rs

1//! Redaction of sensitive values before traces hit disk.
2//!
3//! Two mechanisms are layered:
4//!
5//! 1. Known credential shapes are masked regardless of where they
6//!    appear in the tree. Currently: `ghp_`, `glpat-`, `pk_`, `sk-`,
7//!    `xoxb-` / `xoxa-` / `xapp-`, `Bearer ` / `Basic ` (case-
8//!    insensitive), plus a few other common prefixes. These all
9//!    survive without knowing the configured credential set — useful
10//!    when a token leaks into an error message, a git URL, or a
11//!    user-supplied prompt.
12//! 2. Values of any string-valued environment variable whose name
13//!    matches a sensitive suffix (`*_TOKEN` / `*_SECRET` / `*_KEY` /
14//!    `*_PASSWORD` / `*_PASSPHRASE` / `AUTHORIZATION` / `COOKIE`) are
15//!    masked — the redactor snapshots those at call time.
16//!
17//! Setting the `DEVBOY_TRACE_REDACTION=off` environment variable
18//! disables both passes for local debugging. Never default to off.
19//!
20//! ## Amortizing the env snapshot
21//!
22//! The top-level [`sanitize`] helper walks `std::env::vars()` on every
23//! call — fine for one-shot CLI invocations but wasteful inside a
24//! long-running producer like [`super::SessionTracer`] that writes
25//! many events. Build a [`Redactor`] once with
26//! [`Redactor::snapshot`] and reuse it for every event in the same
27//! session to pay the env scan just once.
28
29use std::collections::HashSet;
30
31use serde_json::Value;
32
33/// Redact sensitive data in `value`. Recursively walks maps and
34/// arrays. Strings are rewritten; numbers / bools / null pass through
35/// unchanged.
36///
37/// Each call snapshots `*_TOKEN` / `*_SECRET` / … env vars afresh so
38/// that tests using `temp_env::with_var` (and production callers that
39/// legitimately mutate the environment) see up-to-date state. Inside
40/// a long session, prefer [`Redactor::snapshot`] + [`Redactor::sanitize`].
41pub fn sanitize(value: Value) -> Value {
42    Redactor::snapshot().sanitize(value)
43}
44
45/// A reusable redactor that holds one env-var snapshot. Created via
46/// [`Redactor::snapshot`]; use once per long-running producer (e.g.
47/// one per `SessionTracer`) to avoid rescanning the environment on
48/// every event.
49#[derive(Debug, Clone)]
50pub struct Redactor {
51    enabled: bool,
52    secrets: HashSet<String>,
53}
54
55impl Redactor {
56    /// Capture the current set of sensitive env-var values and the
57    /// `DEVBOY_TRACE_REDACTION=off` opt-out state. Cheap to clone.
58    pub fn snapshot() -> Self {
59        if redaction_disabled() {
60            Self {
61                enabled: false,
62                secrets: HashSet::new(),
63            }
64        } else {
65            Self {
66                enabled: true,
67                secrets: known_env_secrets(),
68            }
69        }
70    }
71
72    /// Sanitize a single value using the captured env-var snapshot.
73    pub fn sanitize(&self, value: Value) -> Value {
74        if !self.enabled {
75            return value;
76        }
77        sanitize_with(&self.secrets, value)
78    }
79}
80
81fn redaction_disabled() -> bool {
82    match std::env::var("DEVBOY_TRACE_REDACTION") {
83        Ok(v) => matches!(v.to_lowercase().as_str(), "off" | "0" | "false" | "no"),
84        Err(_) => false,
85    }
86}
87
88fn sanitize_with(secrets: &HashSet<String>, value: Value) -> Value {
89    match value {
90        Value::String(s) => Value::String(redact_string(secrets, &s)),
91        Value::Array(xs) => {
92            Value::Array(xs.into_iter().map(|x| sanitize_with(secrets, x)).collect())
93        }
94        Value::Object(map) => {
95            let mut out = serde_json::Map::with_capacity(map.len());
96            for (k, v) in map {
97                // If the key itself hints at a secret, redact the whole
98                // value regardless of its type. This prevents structured
99                // leaks like `{"authorization": {"scheme": "Bearer",
100                // "value": "…"}}` where nested field names may not
101                // themselves trip the secret-key heuristic.
102                let new_val = if key_looks_secret(&k) {
103                    Value::String("<redacted:secret-field>".to_string())
104                } else {
105                    sanitize_with(secrets, v)
106                };
107                out.insert(k, new_val);
108            }
109            Value::Object(out)
110        }
111        other => other,
112    }
113}
114
115fn redact_string(secrets: &HashSet<String>, s: &str) -> String {
116    // 1. Exact env-var match.
117    if !s.is_empty() && secrets.contains(s) {
118        return "<redacted:credential>".to_string();
119    }
120    // 2. Known token prefixes. We search case-sensitively because every
121    //    supported prefix is case-sensitive in practice.
122    if has_known_prefix(s) {
123        return "<redacted:token-pattern>".to_string();
124    }
125    // 3. Bearer / Basic schemes embedded inside a larger string. Don't
126    //    rewrite the whole string — replace only the credential segment.
127    if let Some(rewritten) = mask_auth_header_segment(s) {
128        return rewritten;
129    }
130    s.to_string()
131}
132
133fn has_known_prefix(s: &str) -> bool {
134    // Case-sensitive prefixes. The publisher-defined provider tokens
135    // are all case-sensitive in the wild, so matching them strictly
136    // avoids redacting words that merely share the letters (e.g. an
137    // English sentence starting with "Ghp").
138    const CASE_SENSITIVE: &[&str] = &[
139        // GitHub PATs
140        "ghp_",
141        "github_pat_",
142        "gho_",
143        "ghu_",
144        "ghs_",
145        "ghr_",
146        // GitLab PATs
147        "glpat-",
148        // Publishable / secret key families shared across a few
149        // providers (Stripe, ClickUp, etc.). ADR-015 spec calls these
150        // out as a single `pk_` / `sk_` group — keep them generic.
151        "pk_",
152        "sk_",
153        // OpenAI-ish (also covers sk-ant-… via the `sk-` prefix).
154        "sk-",
155        // Slack
156        "xoxb-",
157        "xoxa-",
158        "xoxp-",
159        "xapp-",
160    ];
161    if CASE_SENSITIVE
162        .iter()
163        .any(|p| s.starts_with(p) && s.len() > p.len() + 8)
164    {
165        return true;
166    }
167    // Case-insensitive auth-scheme prefixes: HTTP scheme tokens are
168    // case-insensitive per RFC 7235, so `Bearer <tok>`, `bearer <tok>`
169    // and `BEARER <tok>` should all redact.
170    const SCHEME_CI: &[&str] = &["bearer ", "basic "];
171    let lower = s.to_ascii_lowercase();
172    SCHEME_CI
173        .iter()
174        .any(|p| lower.starts_with(p) && s.len() > p.len() + 8)
175}
176
177fn mask_auth_header_segment(s: &str) -> Option<String> {
178    // e.g. "Authorization: Bearer ghp_…" embedded inside a log line.
179    // HTTP auth schemes are case-insensitive (RFC 7235), so locate the
180    // needle in the lowercased copy but preserve the original casing
181    // of the scheme token in the rewritten output.
182    let lower = s.to_ascii_lowercase();
183    let needles = ["bearer ", "basic "];
184    for needle in needles {
185        if let Some(idx) = lower.find(needle) {
186            let head = &s[..idx];
187            let scheme = &s[idx..idx + needle.len()]; // original case preserved
188            // Credential runs until whitespace, comma, or semicolon.
189            let rest = &s[idx + needle.len()..];
190            let end = rest
191                .find(|c: char| c.is_whitespace() || c == ',' || c == ';')
192                .unwrap_or(rest.len());
193            if end >= 8 {
194                let tail = &rest[end..];
195                return Some(format!("{head}{scheme}<redacted:auth>{tail}"));
196            }
197        }
198    }
199    None
200}
201
202fn key_looks_secret(key: &str) -> bool {
203    let upper = key.to_ascii_uppercase();
204    const SUFFIXES: &[&str] = &[
205        "_TOKEN",
206        "_SECRET",
207        "_KEY",
208        "_PASSWORD",
209        "_PASSPHRASE",
210        "_AUTH",
211    ];
212    const EXACT: &[&str] = &["AUTHORIZATION", "COOKIE", "TOKEN", "SECRET", "PASSWORD"];
213    if EXACT.contains(&upper.as_str()) {
214        return true;
215    }
216    if SUFFIXES.iter().any(|suf| upper.ends_with(suf)) {
217        return true;
218    }
219    // Common devboy conventions.
220    // Use the upper-cased copy for the substring heuristic too, so
221    // mixed-case keys like `Password` / `Token` / `Secret` are caught
222    // consistently with the EXACT / SUFFIX branches above.
223    if upper.contains("PASSWORD") || upper.contains("SECRET") || upper.contains("TOKEN") {
224        return true;
225    }
226    false
227}
228
229fn known_env_secrets() -> HashSet<String> {
230    let mut out = HashSet::new();
231    for (name, value) in std::env::vars() {
232        if value.is_empty() {
233            continue;
234        }
235        if key_looks_secret(&name) {
236            out.insert(value);
237        }
238    }
239    out
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245    use serde_json::json;
246    use std::sync::Mutex;
247
248    /// Serialise every test in this module around the process-wide
249    /// environment. Two tests legitimately toggle
250    /// `DEVBOY_TRACE_REDACTION=off` via `temp_env::with_var`, and
251    /// `cargo test` runs the others concurrently — without this mutex
252    /// a sibling test's `off` setting can leak into an unrelated test
253    /// for the window it holds the var, making
254    /// `masks_bare_bearer_value_case_insensitive` and friends flake
255    /// on CI. The mutex is cheap (only contended during tests) and
256    /// keeps the production code path zero-overhead. Combined with
257    /// `temp_env::with_var`'s own save/restore logic this gives the
258    /// whole module deterministic env state.
259    static ENV_TEST_MUTEX: Mutex<()> = Mutex::new(());
260
261    /// Helper: acquire the module-wide env-serialisation lock and run
262    /// `f` inside a `temp_env` guard that explicitly UNsets
263    /// `DEVBOY_TRACE_REDACTION`. Used by every test that expects the
264    /// default (enabled) redactor. Without this wrapper a sibling
265    /// test's `DEVBOY_TRACE_REDACTION=off` setting could race in and
266    /// silently disable redaction mid-assertion.
267    fn with_clean_env<R>(f: impl FnOnce() -> R) -> R {
268        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
269        temp_env::with_var("DEVBOY_TRACE_REDACTION", None::<&str>, f)
270    }
271
272    #[test]
273    fn masks_github_pat() {
274        with_clean_env(|| {
275            let v = json!({ "args": { "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" } });
276            let out = sanitize(v);
277            let s = serde_json::to_string(&out).unwrap();
278            assert!(!s.contains("ghp_aaaaaaaa"));
279            assert!(s.contains("<redacted"));
280        });
281    }
282
283    #[test]
284    fn masks_bearer_scheme_in_header_string() {
285        with_clean_env(|| {
286            let v = json!("Authorization: Bearer xxxxxxxxxxxxyyyyyyyyyyyy");
287            let out = sanitize(v);
288            let s = out.as_str().unwrap();
289            assert!(!s.contains("xxxxxxxxxxxxyyyyyyyyyyyy"), "got: {s}");
290            assert!(s.contains("<redacted"), "got: {s}");
291        });
292    }
293
294    #[test]
295    fn masks_by_key_name_even_when_value_looks_harmless() {
296        with_clean_env(|| {
297            // A value that does not match any known prefix but lives under
298            // a key called `password` must still be redacted.
299            let v = json!({ "password": "not-a-prefix" });
300            let out = sanitize(v);
301            assert_eq!(
302                out.get("password").and_then(|v| v.as_str()),
303                Some("<redacted:secret-field>")
304            );
305        });
306    }
307
308    #[test]
309    fn env_var_values_are_redacted_when_they_match_exactly() {
310        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
311        temp_env::with_vars(
312            [
313                ("DEVBOY_TRACE_REDACTION", None::<&str>),
314                (
315                    "DEVBOY_TEST_TOKEN",
316                    Some("super-secret-value-nothing-matches"),
317                ),
318            ],
319            || {
320                let v = json!({ "note": "leaked: super-secret-value-nothing-matches" });
321                let out = sanitize(v);
322                // The exact-match secret replacement only fires when
323                // the value IS the secret — not when it's embedded in
324                // a larger string. Embedded leakage is the DLP case we
325                // don't attempt to solve (see the doc comment).
326                let note = out.get("note").and_then(|v| v.as_str()).unwrap();
327                assert_eq!(note, "leaked: super-secret-value-nothing-matches");
328
329                let v = json!({ "raw": "super-secret-value-nothing-matches" });
330                let out = sanitize(v);
331                assert_eq!(
332                    out.get("raw").and_then(|v| v.as_str()),
333                    Some("<redacted:credential>")
334                );
335            },
336        );
337    }
338
339    #[test]
340    fn short_strings_are_not_redacted_by_prefix_check() {
341        with_clean_env(|| {
342            // `ghp_` alone must not be redacted — only long PAT-shaped
343            // strings are. This matters for documentation and for the
344            // redaction marker itself.
345            let v = json!("ghp_");
346            assert_eq!(sanitize(v).as_str(), Some("ghp_"));
347        });
348    }
349
350    #[test]
351    fn redaction_can_be_disabled_via_env() {
352        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
353        temp_env::with_var("DEVBOY_TRACE_REDACTION", Some("off"), || {
354            let v = json!({ "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" });
355            let out = sanitize(v.clone());
356            assert_eq!(out, v);
357        });
358    }
359
360    #[test]
361    fn masks_bearer_scheme_case_insensitive() {
362        with_clean_env(|| {
363            // HTTP schemes are case-insensitive per RFC 7235, so all of
364            // these variants must redact.
365            for header in [
366                "Authorization: Bearer xxxxxxxxxxxxyyyyyyyyyyyy",
367                "authorization: bearer xxxxxxxxxxxxyyyyyyyyyyyy",
368                "AUTHORIZATION: BEARER xxxxxxxxxxxxyyyyyyyyyyyy",
369                "authorization: BeArEr xxxxxxxxxxxxyyyyyyyyyyyy",
370            ] {
371                let out = sanitize(json!(header));
372                let s = out.as_str().unwrap();
373                assert!(
374                    !s.contains("xxxxxxxxxxxxyyyyyyyyyyyy"),
375                    "token leaked for header `{header}` → `{s}`"
376                );
377                assert!(
378                    s.contains("<redacted"),
379                    "no redaction marker for header `{header}` → `{s}`"
380                );
381            }
382        });
383    }
384
385    #[test]
386    fn masks_bare_bearer_value_case_insensitive() {
387        with_clean_env(|| {
388            // When the caller pasted just the `Bearer <token>` segment as
389            // a standalone value, the prefix check (not the header scanner)
390            // fires — must also be case-insensitive.
391            for raw in [
392                "Bearer abcdefghijklmnopqrstuvwx",
393                "bearer abcdefghijklmnopqrstuvwx",
394                "BEARER abcdefghijklmnopqrstuvwx",
395                "Basic YWxpY2U6aHVudGVyMjpkcmFnb24=",
396            ] {
397                let out = sanitize(json!(raw));
398                let s = out.as_str().unwrap();
399                assert!(s.contains("<redacted"), "not redacted: `{raw}` → `{s}`");
400            }
401        });
402    }
403
404    #[test]
405    fn masks_generic_pk_prefix() {
406        with_clean_env(|| {
407            // ADR-015 calls out a generic `pk_` prefix (not just
408            // `pk_live_` / `pk_test_`). Enough bytes after the prefix to
409            // clear the length guard so a bare `pk_` literal is left alone.
410            let v = json!({ "clickup_pk": "pk_abcdefghijklmnop" });
411            let out = sanitize(v);
412            assert_eq!(
413                out.get("clickup_pk").and_then(|v| v.as_str()),
414                Some("<redacted:token-pattern>"),
415                "generic pk_ prefix should redact"
416            );
417
418            // Short `pk_` literal stays untouched (e.g. in docs).
419            let doc = json!("pk_");
420            assert_eq!(sanitize(doc).as_str(), Some("pk_"));
421        });
422    }
423
424    #[test]
425    fn redactor_snapshot_amortizes_env_scan() {
426        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
427        let redactor = temp_env::with_vars(
428            [
429                ("DEVBOY_TRACE_REDACTION", None::<&str>),
430                ("DEVBOY_REDACTOR_CACHE_TOKEN", Some("cached-token-zzzzzzzz")),
431            ],
432            Redactor::snapshot,
433        );
434        // The env var is gone at this point, but the snapshot remembers.
435        let out = redactor.sanitize(json!({ "raw": "cached-token-zzzzzzzz" }));
436        assert_eq!(
437            out.get("raw").and_then(|v| v.as_str()),
438            Some("<redacted:credential>")
439        );
440    }
441
442    #[test]
443    fn redactor_snapshot_respects_disable_env() {
444        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
445        temp_env::with_var("DEVBOY_TRACE_REDACTION", Some("off"), || {
446            let redactor = Redactor::snapshot();
447            let v = json!({ "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" });
448            assert_eq!(redactor.sanitize(v.clone()), v);
449        });
450    }
451}