Skip to main content

devboy_skills/trace/
redact.rs

1//! Redaction of sensitive values before traces hit disk.
2//!
3//! Two mechanisms are layered:
4//!
5//! 1. Known credential shapes are masked regardless of where they
6//!    appear in the tree. Currently: `ghp_`, `glpat-`, `pk_`, `sk-`,
7//!    `xoxb-` / `xoxa-` / `xapp-`, `Bearer ` / `Basic ` (case-
8//!    insensitive), plus a few other common prefixes. These all
9//!    survive without knowing the configured credential set — useful
10//!    when a token leaks into an error message, a git URL, or a
11//!    user-supplied prompt.
12//! 2. Values of any string-valued environment variable whose name
13//!    matches a sensitive suffix (`*_TOKEN` / `*_SECRET` / `*_KEY` /
14//!    `*_PASSWORD` / `*_PASSPHRASE` / `AUTHORIZATION` / `COOKIE`) are
15//!    masked — the redactor snapshots those at call time.
16//!
17//! Setting the `DEVBOY_TRACE_REDACTION=off` environment variable
18//! disables both passes for local debugging. Never default to off.
19//!
20//! ## Amortizing the env snapshot
21//!
22//! The top-level [`sanitize`] helper walks `std::env::vars()` on every
23//! call — fine for one-shot CLI invocations but wasteful inside a
24//! long-running producer like [`super::SessionTracer`] that writes
25//! many events. Build a [`Redactor`] once with
26//! [`Redactor::snapshot`] and reuse it for every event in the same
27//! session to pay the env scan just once.
28
29use std::collections::HashSet;
30
31use serde_json::Value;
32
33/// Redact sensitive data in `value`. Recursively walks maps and
34/// arrays. Strings are rewritten; numbers / bools / null pass through
35/// unchanged.
36///
37/// Each call snapshots `*_TOKEN` / `*_SECRET` / … env vars afresh so
38/// that tests using `temp_env::with_var` (and production callers that
39/// legitimately mutate the environment) see up-to-date state. Inside
40/// a long session, prefer [`Redactor::snapshot`] + [`Redactor::sanitize`].
41pub fn sanitize(value: Value) -> Value {
42    Redactor::snapshot().sanitize(value)
43}
44
45/// A reusable redactor that holds one env-var snapshot. Created via
46/// [`Redactor::snapshot`]; use once per long-running producer (e.g.
47/// one per `SessionTracer`) to avoid rescanning the environment on
48/// every event.
49#[derive(Debug, Clone)]
50pub struct Redactor {
51    enabled: bool,
52    secrets: HashSet<String>,
53}
54
55impl Redactor {
56    /// Capture the current set of sensitive env-var values and the
57    /// `DEVBOY_TRACE_REDACTION=off` opt-out state. Cheap to clone.
58    pub fn snapshot() -> Self {
59        if redaction_disabled() {
60            Self {
61                enabled: false,
62                secrets: HashSet::new(),
63            }
64        } else {
65            Self {
66                enabled: true,
67                secrets: known_env_secrets(),
68            }
69        }
70    }
71
72    /// Sanitize a single value using the captured env-var snapshot.
73    pub fn sanitize(&self, value: Value) -> Value {
74        if !self.enabled {
75            return value;
76        }
77        sanitize_with(&self.secrets, value)
78    }
79}
80
81fn redaction_disabled() -> bool {
82    match std::env::var("DEVBOY_TRACE_REDACTION") {
83        Ok(v) => matches!(v.to_lowercase().as_str(), "off" | "0" | "false" | "no"),
84        Err(_) => false,
85    }
86}
87
88fn sanitize_with(secrets: &HashSet<String>, value: Value) -> Value {
89    match value {
90        Value::String(s) => Value::String(redact_string(secrets, &s)),
91        Value::Array(xs) => {
92            Value::Array(xs.into_iter().map(|x| sanitize_with(secrets, x)).collect())
93        }
94        Value::Object(map) => {
95            let mut out = serde_json::Map::with_capacity(map.len());
96            for (k, v) in map {
97                // If the key itself hints at a secret, redact the whole
98                // value regardless of its type. This prevents structured
99                // leaks like `{"authorization": {"scheme": "Bearer",
100                // "value": "…"}}` where nested field names may not
101                // themselves trip the secret-key heuristic.
102                let new_val = if key_looks_secret(&k) {
103                    Value::String("<redacted:secret-field>".to_string())
104                } else {
105                    sanitize_with(secrets, v)
106                };
107                out.insert(k, new_val);
108            }
109            Value::Object(out)
110        }
111        other => other,
112    }
113}
114
115fn redact_string(secrets: &HashSet<String>, s: &str) -> String {
116    // 1. Exact env-var match.
117    if !s.is_empty() && secrets.contains(s) {
118        return "<redacted:credential>".to_string();
119    }
120    // 2. Known token prefixes. We search case-sensitively because every
121    //    supported prefix is case-sensitive in practice.
122    if has_known_prefix(s) {
123        return "<redacted:token-pattern>".to_string();
124    }
125    // 3. Bearer / Basic schemes embedded inside a larger string. Don't
126    //    rewrite the whole string — replace only the credential segment.
127    if let Some(rewritten) = mask_auth_header_segment(s) {
128        return rewritten;
129    }
130    s.to_string()
131}
132
133fn has_known_prefix(s: &str) -> bool {
134    // Case-sensitive prefixes. The publisher-defined provider tokens
135    // are all case-sensitive in the wild, so matching them strictly
136    // avoids redacting words that merely share the letters (e.g. an
137    // English sentence starting with "Ghp").
138    const CASE_SENSITIVE: &[&str] = &[
139        // GitHub PATs
140        "ghp_",
141        "github_pat_",
142        "gho_",
143        "ghu_",
144        "ghs_",
145        "ghr_",
146        // GitLab PATs
147        "glpat-",
148        // Publishable / secret key families shared across a few
149        // providers (Stripe, ClickUp, etc.). ADR-015 spec calls these
150        // out as a single `pk_` / `sk_` group — keep them generic.
151        "pk_",
152        "sk_",
153        // OpenAI-ish (also covers sk-ant-… via the `sk-` prefix).
154        "sk-",
155        // Slack
156        "xoxb-",
157        "xoxa-",
158        "xoxp-",
159        "xapp-",
160    ];
161    if CASE_SENSITIVE
162        .iter()
163        .any(|p| s.starts_with(p) && s.len() > p.len() + 8)
164    {
165        return true;
166    }
167    // Case-insensitive auth-scheme prefixes: HTTP scheme tokens are
168    // case-insensitive per RFC 7235, so `Bearer <tok>`, `bearer <tok>`
169    // and `BEARER <tok>` should all redact.
170    const SCHEME_CI: &[&str] = &["bearer ", "basic "];
171    let lower = s.to_ascii_lowercase();
172    SCHEME_CI
173        .iter()
174        .any(|p| lower.starts_with(p) && s.len() > p.len() + 8)
175}
176
177fn mask_auth_header_segment(s: &str) -> Option<String> {
178    // e.g. "Authorization: Bearer ghp_…" embedded inside a log line.
179    // HTTP auth schemes are case-insensitive (RFC 7235), so locate the
180    // needle in the lowercased copy but preserve the original casing
181    // of the scheme token in the rewritten output.
182    let lower = s.to_ascii_lowercase();
183    let needles = ["bearer ", "basic "];
184    for needle in needles {
185        if let Some(idx) = lower.find(needle) {
186            let head = &s[..idx];
187            let scheme = &s[idx..idx + needle.len()]; // original case preserved
188            // Credential runs until whitespace, comma, or semicolon.
189            let rest = &s[idx + needle.len()..];
190            let end = rest
191                .find(|c: char| c.is_whitespace() || c == ',' || c == ';')
192                .unwrap_or(rest.len());
193            if end >= 8 {
194                let tail = &rest[end..];
195                return Some(format!("{head}{scheme}<redacted:auth>{tail}"));
196            }
197        }
198    }
199    None
200}
201
202fn key_looks_secret(key: &str) -> bool {
203    let upper = key.to_ascii_uppercase();
204    const SUFFIXES: &[&str] = &[
205        "_TOKEN",
206        "_SECRET",
207        "_KEY",
208        "_PASSWORD",
209        "_PASSPHRASE",
210        "_AUTH",
211    ];
212    const EXACT: &[&str] = &["AUTHORIZATION", "COOKIE", "TOKEN", "SECRET", "PASSWORD"];
213    if EXACT.contains(&upper.as_str()) {
214        return true;
215    }
216    if SUFFIXES.iter().any(|suf| upper.ends_with(suf)) {
217        return true;
218    }
219    // Common devboy conventions.
220    // Use the upper-cased copy for the substring heuristic too, so
221    // mixed-case keys like `Password` / `Token` / `Secret` are caught
222    // consistently with the EXACT / SUFFIX branches above.
223    if upper.contains("PASSWORD") || upper.contains("SECRET") || upper.contains("TOKEN") {
224        return true;
225    }
226    false
227}
228
229fn known_env_secrets() -> HashSet<String> {
230    let mut out = HashSet::new();
231    for (name, value) in std::env::vars() {
232        if value.is_empty() {
233            continue;
234        }
235        if key_looks_secret(&name) {
236            out.insert(value);
237        }
238    }
239    out
240}
241
242/// Test-only helpers shared with sibling modules (notably `trace::tests`)
243/// that also touch `DEVBOY_TRACE_REDACTION`. Sharing the same mutex
244/// across modules is required: without it, a `temp_env::with_var(..,
245/// "off")` in one test leaks into a concurrently running assertion
246/// elsewhere in the crate and silently disables redaction mid-test.
247#[cfg(test)]
248pub(crate) mod test_support {
249    use std::sync::Mutex;
250
251    /// Serialise every test in `devboy-skills` that mutates the
252    /// process-wide environment. Two tests legitimately toggle
253    /// `DEVBOY_TRACE_REDACTION=off` via `temp_env::with_var`, and
254    /// `cargo test` runs the others concurrently — without this
255    /// mutex a sibling test's `off` setting can leak into an
256    /// unrelated test for the window it holds the var, making
257    /// arm64-Linux `events_are_redacted_before_writing` and
258    /// `masks_bare_bearer_value_case_insensitive` flake on CI. The
259    /// mutex is cheap (only contended during tests) and keeps the
260    /// production code path zero-overhead. Combined with
261    /// `temp_env::with_var`'s own save/restore logic this gives the
262    /// whole crate deterministic env state.
263    pub(crate) static ENV_TEST_MUTEX: Mutex<()> = Mutex::new(());
264
265    /// Acquire the crate-wide env-serialisation lock and run `f`
266    /// inside a `temp_env` guard that explicitly UNsets
267    /// `DEVBOY_TRACE_REDACTION`. Used by every test that expects the
268    /// default (enabled) redactor.
269    pub(crate) fn with_clean_env<R>(f: impl FnOnce() -> R) -> R {
270        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
271        temp_env::with_var("DEVBOY_TRACE_REDACTION", None::<&str>, f)
272    }
273}
274
275#[cfg(test)]
276mod tests {
277    use super::test_support::{ENV_TEST_MUTEX, with_clean_env};
278    use super::*;
279    use serde_json::json;
280
281    #[test]
282    fn masks_github_pat() {
283        with_clean_env(|| {
284            let v = json!({ "args": { "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" } });
285            let out = sanitize(v);
286            let s = serde_json::to_string(&out).unwrap();
287            assert!(!s.contains("ghp_aaaaaaaa"));
288            assert!(s.contains("<redacted"));
289        });
290    }
291
292    #[test]
293    fn masks_bearer_scheme_in_header_string() {
294        with_clean_env(|| {
295            let v = json!("Authorization: Bearer xxxxxxxxxxxxyyyyyyyyyyyy");
296            let out = sanitize(v);
297            let s = out.as_str().unwrap();
298            assert!(!s.contains("xxxxxxxxxxxxyyyyyyyyyyyy"), "got: {s}");
299            assert!(s.contains("<redacted"), "got: {s}");
300        });
301    }
302
303    #[test]
304    fn masks_by_key_name_even_when_value_looks_harmless() {
305        with_clean_env(|| {
306            // A value that does not match any known prefix but lives under
307            // a key called `password` must still be redacted.
308            let v = json!({ "password": "not-a-prefix" });
309            let out = sanitize(v);
310            assert_eq!(
311                out.get("password").and_then(|v| v.as_str()),
312                Some("<redacted:secret-field>")
313            );
314        });
315    }
316
317    #[test]
318    fn env_var_values_are_redacted_when_they_match_exactly() {
319        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
320        temp_env::with_vars(
321            [
322                ("DEVBOY_TRACE_REDACTION", None::<&str>),
323                (
324                    "DEVBOY_TEST_TOKEN",
325                    Some("super-secret-value-nothing-matches"),
326                ),
327            ],
328            || {
329                let v = json!({ "note": "leaked: super-secret-value-nothing-matches" });
330                let out = sanitize(v);
331                // The exact-match secret replacement only fires when
332                // the value IS the secret — not when it's embedded in
333                // a larger string. Embedded leakage is the DLP case we
334                // don't attempt to solve (see the doc comment).
335                let note = out.get("note").and_then(|v| v.as_str()).unwrap();
336                assert_eq!(note, "leaked: super-secret-value-nothing-matches");
337
338                let v = json!({ "raw": "super-secret-value-nothing-matches" });
339                let out = sanitize(v);
340                assert_eq!(
341                    out.get("raw").and_then(|v| v.as_str()),
342                    Some("<redacted:credential>")
343                );
344            },
345        );
346    }
347
348    #[test]
349    fn short_strings_are_not_redacted_by_prefix_check() {
350        with_clean_env(|| {
351            // `ghp_` alone must not be redacted — only long PAT-shaped
352            // strings are. This matters for documentation and for the
353            // redaction marker itself.
354            let v = json!("ghp_");
355            assert_eq!(sanitize(v).as_str(), Some("ghp_"));
356        });
357    }
358
359    #[test]
360    fn redaction_can_be_disabled_via_env() {
361        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
362        temp_env::with_var("DEVBOY_TRACE_REDACTION", Some("off"), || {
363            let v = json!({ "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" });
364            let out = sanitize(v.clone());
365            assert_eq!(out, v);
366        });
367    }
368
369    #[test]
370    fn masks_bearer_scheme_case_insensitive() {
371        with_clean_env(|| {
372            // HTTP schemes are case-insensitive per RFC 7235, so all of
373            // these variants must redact.
374            for header in [
375                "Authorization: Bearer xxxxxxxxxxxxyyyyyyyyyyyy",
376                "authorization: bearer xxxxxxxxxxxxyyyyyyyyyyyy",
377                "AUTHORIZATION: BEARER xxxxxxxxxxxxyyyyyyyyyyyy",
378                "authorization: BeArEr xxxxxxxxxxxxyyyyyyyyyyyy",
379            ] {
380                let out = sanitize(json!(header));
381                let s = out.as_str().unwrap();
382                assert!(
383                    !s.contains("xxxxxxxxxxxxyyyyyyyyyyyy"),
384                    "token leaked for header `{header}` → `{s}`"
385                );
386                assert!(
387                    s.contains("<redacted"),
388                    "no redaction marker for header `{header}` → `{s}`"
389                );
390            }
391        });
392    }
393
394    #[test]
395    fn masks_bare_bearer_value_case_insensitive() {
396        with_clean_env(|| {
397            // When the caller pasted just the `Bearer <token>` segment as
398            // a standalone value, the prefix check (not the header scanner)
399            // fires — must also be case-insensitive.
400            for raw in [
401                "Bearer abcdefghijklmnopqrstuvwx",
402                "bearer abcdefghijklmnopqrstuvwx",
403                "BEARER abcdefghijklmnopqrstuvwx",
404                "Basic YWxpY2U6aHVudGVyMjpkcmFnb24=",
405            ] {
406                let out = sanitize(json!(raw));
407                let s = out.as_str().unwrap();
408                assert!(s.contains("<redacted"), "not redacted: `{raw}` → `{s}`");
409            }
410        });
411    }
412
413    #[test]
414    fn masks_generic_pk_prefix() {
415        with_clean_env(|| {
416            // ADR-015 calls out a generic `pk_` prefix (not just
417            // `pk_live_` / `pk_test_`). Enough bytes after the prefix to
418            // clear the length guard so a bare `pk_` literal is left alone.
419            let v = json!({ "clickup_pk": "pk_abcdefghijklmnop" });
420            let out = sanitize(v);
421            assert_eq!(
422                out.get("clickup_pk").and_then(|v| v.as_str()),
423                Some("<redacted:token-pattern>"),
424                "generic pk_ prefix should redact"
425            );
426
427            // Short `pk_` literal stays untouched (e.g. in docs).
428            let doc = json!("pk_");
429            assert_eq!(sanitize(doc).as_str(), Some("pk_"));
430        });
431    }
432
433    #[test]
434    fn redactor_snapshot_amortizes_env_scan() {
435        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
436        let redactor = temp_env::with_vars(
437            [
438                ("DEVBOY_TRACE_REDACTION", None::<&str>),
439                ("DEVBOY_REDACTOR_CACHE_TOKEN", Some("cached-token-zzzzzzzz")),
440            ],
441            Redactor::snapshot,
442        );
443        // The env var is gone at this point, but the snapshot remembers.
444        let out = redactor.sanitize(json!({ "raw": "cached-token-zzzzzzzz" }));
445        assert_eq!(
446            out.get("raw").and_then(|v| v.as_str()),
447            Some("<redacted:credential>")
448        );
449    }
450
451    #[test]
452    fn redactor_snapshot_respects_disable_env() {
453        let _guard = ENV_TEST_MUTEX.lock().unwrap_or_else(|p| p.into_inner());
454        temp_env::with_var("DEVBOY_TRACE_REDACTION", Some("off"), || {
455            let redactor = Redactor::snapshot();
456            let v = json!({ "token": "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" });
457            assert_eq!(redactor.sanitize(v.clone()), v);
458        });
459    }
460}