Skip to main content

difflore_core/observability/
privacy.rs

1pub const PRIVATE_REDACTION: &str = "[redacted private content]";
2
3/// Marker substituted in place of every redacted secret by [`redact_secrets`].
4/// Kept byte-for-byte identical to the cloud's `SECRET_REDACTION_PLACEHOLDER`
5/// (`redact-secrets.ts`) so a rule that round-trips through either side reads
6/// the same.
7pub const SECRET_REDACTION_PLACEHOLDER: &str = "‹redacted-secret›";
8
9/// Conservative pre-persist secret redaction for locally-drafted rule text.
10///
11/// This is the Rust analogue of the cloud's `redactSecrets` in
12/// `difflore-cloud/src/lib/redact-secrets.ts`; it mirrors the SAME secret
13/// classes so a rule drafted locally is scrubbed before it is written to the
14/// SQLite skills store (and lazily embedded), exactly as the cloud scrubs
15/// before persisting/embedding a candidate. The classes, in priority order:
16///
17///   1. Provider-prefixed credentials + JWTs — redacted on shape alone
18///      (`gh[opsu]_…`, `github_pat_…`, `sk-…`, `xox[baprs]-…`, `AKIA…`,
19///      JWT `eyJ….….…`).
20///   2. `Bearer <token>` (HTTP Authorization style) — unless the token is a
21///      plain code reference.
22///   3. `<keyword> [:=] <value>` assignments for api_key / access_token /
23///      refresh_token / id_token / auth_token / bearer_token / client_secret /
24///      webhook_secret / secret / password / passwd / pwd — redacted ONLY when
25///      the value both carries secret-like entropy AND is not a code reference.
26///
27/// Conservative by design: it runs over real review prose and quoted code
28/// snippets, so a false positive silently corrupts a legitimate rule. The
29/// keyword-assignment class therefore never fires on `config.apiKey`,
30/// `process.env.API_KEY`, `getToken()`, or a plain identifier; the prefix/JWT
31/// classes fire only on their distinctive high-entropy shape. Plain prose, git
32/// SHAs, and UUIDs are left untouched (see the unit tests).
33#[must_use]
34pub fn redact_secrets(text: &str) -> String {
35    if text.is_empty() {
36        return String::new();
37    }
38    let chars: Vec<char> = text.chars().collect();
39    let mut out = String::with_capacity(text.len());
40    let mut i = 0usize;
41    while i < chars.len() {
42        if at_word_boundary(&chars, i) {
43            // 1) Provider-prefixed credential / JWT — redact on shape.
44            if let Some(end) = match_known_prefix_secret(&chars, i) {
45                out.push_str(SECRET_REDACTION_PLACEHOLDER);
46                i = end;
47                continue;
48            }
49            // 2) `Bearer <token>` — redact unless the token is a code ref.
50            if let Some((prefix_end, token_end)) = match_bearer_secret(&chars, i) {
51                let value: String = chars[prefix_end..token_end].iter().collect();
52                if !looks_like_code_reference(&value) {
53                    out.extend(chars[i..prefix_end].iter());
54                    out.push_str(SECRET_REDACTION_PLACEHOLDER);
55                    i = token_end;
56                    continue;
57                }
58            }
59            // 3) `<keyword> [:=] [quote] <token> [quote]` — redact only a
60            //    high-entropy, non-reference value.
61            if let Some(m) = match_named_secret_assign(&chars, i) {
62                let value: String = chars[m.value_start..m.value_end].iter().collect();
63                if !looks_like_code_reference(&value) && has_secret_entropy(&value) {
64                    // `chars[i..value_start]` already carries the keyword,
65                    // operator, whitespace, AND the opening quote (value_start
66                    // sits just past it). Emit that, the placeholder, then a
67                    // symmetric closing quote — mirroring the cloud's
68                    // `${prefix}${openQuote}${PLACEHOLDER}${openQuote}`. The
69                    // ORIGINAL closing quote is consumed via `match_end`.
70                    out.extend(chars[i..m.value_start].iter());
71                    out.push_str(SECRET_REDACTION_PLACEHOLDER);
72                    if let Some(q) = m.open_quote {
73                        out.push(q);
74                    }
75                    i = m.match_end;
76                    continue;
77                }
78            }
79        }
80        out.push(chars[i]);
81        i += 1;
82    }
83    out
84}
85
86/// A char is part of the `\w`-class token alphabet shared by the cloud regexes
87/// (`[\w.~+/=-]` plus the prefix/JWT alphabets). Used for `\b` boundary checks
88/// so we only start a match at a real token boundary, never mid-identifier.
89const fn is_token_char(c: char) -> bool {
90    c.is_ascii_alphanumeric() || matches!(c, '_' | '.' | '~' | '+' | '/' | '=' | '-')
91}
92
93/// `\w` (word) char for the `\b` boundaries the cloud regexes use: ASCII
94/// alphanumeric or underscore. A match may only begin where the previous char
95/// is NOT a word char (start-of-string counts as a boundary).
96const fn is_word_char(c: char) -> bool {
97    c.is_ascii_alphanumeric() || c == '_'
98}
99
100fn at_word_boundary(chars: &[char], i: usize) -> bool {
101    i == 0 || !is_word_char(chars[i - 1])
102}
103
104/// Length (in chars) of the maximal secret-token run `[\w.~+/=-]+` starting at
105/// `start`. Mirrors the cloud `SECRET_TOKEN = [\w.~+/=-]{12,}` (the `{12,}`
106/// length gate is applied by callers).
107fn secret_token_len(chars: &[char], start: usize) -> usize {
108    let mut end = start;
109    while end < chars.len() && is_token_char(chars[end]) {
110        end += 1;
111    }
112    end - start
113}
114
115/// Try to match a provider-prefixed credential or JWT at `start`, returning the
116/// end index (exclusive) on success. Each arm also enforces the trailing `\b`
117/// the cloud regex requires, so `AKIA…` embedded in a longer token is rejected.
118fn match_known_prefix_secret(chars: &[char], start: usize) -> Option<usize> {
119    // gh[opsu]_[A-Za-z0-9]{20,}
120    if let Some(&[g, h, t, u]) = chars.get(start..start + 4) {
121        if g == 'g' && h == 'h' && matches!(t, 'o' | 'p' | 's' | 'u') && u == '_' {
122            if let Some(end) = match_prefix_run(chars, start + 4, 20, |c| c.is_ascii_alphanumeric())
123            {
124                return Some(end);
125            }
126        }
127    }
128    // github_pat_[A-Za-z0-9_]{20,} (case-sensitive, like the cloud arm).
129    if starts_with_chars(chars, start, "github_pat_") {
130        if let Some(end) = match_prefix_run(chars, start + "github_pat_".len(), 20, |c| {
131            c.is_ascii_alphanumeric() || c == '_'
132        }) {
133            return Some(end);
134        }
135    }
136    // sk-[A-Za-z0-9]{20,} (case-sensitive, like the cloud arm).
137    if starts_with_chars(chars, start, "sk-") {
138        if let Some(end) = match_prefix_run(chars, start + "sk-".len(), 20, |c| {
139            c.is_ascii_alphanumeric()
140        }) {
141            return Some(end);
142        }
143    }
144    // xox[baprs]-[A-Za-z0-9-]{20,}
145    if let Some(&[x, o, x2, kind, dash]) = chars.get(start..start + 5) {
146        if x == 'x'
147            && o == 'o'
148            && x2 == 'x'
149            && matches!(kind, 'b' | 'a' | 'p' | 'r' | 's')
150            && dash == '-'
151        {
152            if let Some(end) = match_prefix_run(chars, start + 5, 20, |c| {
153                c.is_ascii_alphanumeric() || c == '-'
154            }) {
155                return Some(end);
156            }
157        }
158    }
159    // AKIA[0-9A-Z]{16} — case-sensitive prefix, then exactly 16 uppercase/digit
160    // chars and a trailing boundary.
161    if starts_with_chars(chars, start, "AKIA") {
162        let body_start = start + 4;
163        let end = body_start + 16;
164        if end <= chars.len()
165            && chars[body_start..end]
166                .iter()
167                .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
168            && (end >= chars.len() || !is_word_char(chars[end]))
169        {
170            return Some(end);
171        }
172    }
173    // eyJ[\w-]{10,}\.[\w-]{10,}\.[\w-]{10,} — JWT (three base64url segments).
174    if starts_with_chars(chars, start, "eyJ") {
175        if let Some(end) = match_jwt(chars, start) {
176            return Some(end);
177        }
178    }
179    None
180}
181
182/// True when `chars[start..]` begins with the ASCII `prefix` (case-sensitive),
183/// compared char-by-char so no allocation is needed per probe.
184fn starts_with_chars(chars: &[char], start: usize, prefix: &str) -> bool {
185    for (idx, pc) in (start..).zip(prefix.chars()) {
186        if chars.get(idx) != Some(&pc) {
187            return false;
188        }
189    }
190    true
191}
192
193/// Case-INSENSITIVE variant of [`starts_with_chars`] for the keyword-assignment
194/// class (the cloud regex carries the `i` flag).
195fn starts_with_chars_ci(chars: &[char], start: usize, prefix: &str) -> bool {
196    for (idx, pc) in (start..).zip(prefix.chars()) {
197        match chars.get(idx) {
198            Some(c) if c.eq_ignore_ascii_case(&pc) => {}
199            _ => return false,
200        }
201    }
202    true
203}
204
205/// Match a `prefix`-run of at least `min` chars satisfying `pred` beginning at
206/// `body_start`, with a trailing `\b`. Returns the end index on success.
207fn match_prefix_run(
208    chars: &[char],
209    body_start: usize,
210    min: usize,
211    pred: impl Fn(char) -> bool,
212) -> Option<usize> {
213    let mut end = body_start;
214    while end < chars.len() && pred(chars[end]) {
215        end += 1;
216    }
217    // The run stops at the first char failing `pred`. Every arm's `pred`
218    // already accepts the full `\w`-superset its `\b` cares about (alnum, `_`,
219    // `-`), so stopping here IS the trailing word-boundary the cloud regex
220    // requires — no extra check needed.
221    (end - body_start >= min).then_some(end)
222}
223
224/// JWT: three `[\w-]{10,}` segments separated by literal dots, starting at the
225/// `eyJ` header. Enforces the trailing `\b`.
226fn match_jwt(chars: &[char], start: usize) -> Option<usize> {
227    let seg = |from: usize| -> Option<usize> {
228        let mut end = from;
229        while end < chars.len() && (is_word_char(chars[end]) || chars[end] == '-') {
230            end += 1;
231        }
232        (end - from >= 10).then_some(end)
233    };
234    let s1 = seg(start)?;
235    if chars.get(s1) != Some(&'.') {
236        return None;
237    }
238    let s2 = seg(s1 + 1)?;
239    if chars.get(s2) != Some(&'.') {
240        return None;
241    }
242    let s3 = seg(s2 + 1)?;
243    if s3 < chars.len() && is_word_char(chars[s3]) {
244        return None;
245    }
246    Some(s3)
247}
248
249/// `Bearer\s+<token>` — returns `(prefix_end, token_end)` where `prefix_end` is
250/// the index just past the whitespace (start of the token). The token is the
251/// `[\w.~+/=-]{12,}` run; trailing `\b` is implied because the run stops at the
252/// first non-token char.
253fn match_bearer_secret(chars: &[char], start: usize) -> Option<(usize, usize)> {
254    let head: String = chars
255        .get(start..(start + 6).min(chars.len()))?
256        .iter()
257        .collect();
258    if head != "Bearer" {
259        return None;
260    }
261    let mut j = start + 6;
262    let ws_start = j;
263    while j < chars.len() && chars[j].is_whitespace() {
264        j += 1;
265    }
266    if j == ws_start {
267        return None; // require at least one whitespace char (`\s+`)
268    }
269    let len = secret_token_len(chars, j);
270    if len < 12 {
271        return None;
272    }
273    Some((j, j + len))
274}
275
276struct NamedAssignMatch {
277    value_start: usize,
278    value_end: usize,
279    open_quote: Option<char>,
280    match_end: usize,
281}
282
283/// `<keyword>\s*[:=]\s*["'`]?<token>["'`]?` (case-insensitive keyword). Returns
284/// the value span, the optional opening quote (re-emitted around the
285/// placeholder so surrounding syntax survives), and the overall match end.
286fn match_named_secret_assign(chars: &[char], start: usize) -> Option<NamedAssignMatch> {
287    const KEYWORDS: &[&str] = &[
288        "api_key",
289        "apikey",
290        "api-key",
291        "access_token",
292        "accesstoken",
293        "access-token",
294        "refresh_token",
295        "refreshtoken",
296        "refresh-token",
297        "id_token",
298        "idtoken",
299        "id-token",
300        "auth_token",
301        "authtoken",
302        "auth-token",
303        "bearer_token",
304        "bearertoken",
305        "bearer-token",
306        "client_secret",
307        "clientsecret",
308        "client-secret",
309        "webhook_secret",
310        "webhooksecret",
311        "webhook-secret",
312        "secret",
313        "password",
314        "passwd",
315        "pwd",
316    ];
317    // Longest keyword first so `client_secret` wins over `secret`.
318    let kw_len = KEYWORDS
319        .iter()
320        .filter(|kw| starts_with_chars_ci(chars, start, kw))
321        .map(|kw| kw.chars().count())
322        .max()?;
323    let mut j = start + kw_len;
324    // Reject if the keyword is only a prefix of a longer identifier
325    // (`secretariat`, `passwords`): the next char must not be a word char.
326    if j < chars.len() && is_word_char(chars[j]) {
327        return None;
328    }
329    // `\s*` before the operator.
330    while j < chars.len() && chars[j].is_whitespace() {
331        j += 1;
332    }
333    if !matches!(chars.get(j), Some(':' | '=')) {
334        return None;
335    }
336    j += 1;
337    // `\s*` after the operator.
338    while j < chars.len() && chars[j].is_whitespace() {
339        j += 1;
340    }
341    let open_quote = match chars.get(j) {
342        Some(c @ ('"' | '\'' | '`')) => {
343            let q = *c;
344            j += 1;
345            Some(q)
346        }
347        _ => None,
348    };
349    let value_start = j;
350    let len = secret_token_len(chars, value_start);
351    if len < 12 {
352        return None;
353    }
354    let value_end = value_start + len;
355    let mut match_end = value_end;
356    // Optional closing quote (the cloud captures `["'`]?` but does not require
357    // it to match the opener); consume one if present.
358    if matches!(chars.get(match_end), Some('"' | '\'' | '`')) {
359        match_end += 1;
360    }
361    Some(NamedAssignMatch {
362        value_start,
363        value_end,
364        open_quote,
365        match_end,
366    })
367}
368
369/// True when a keyword-assignment / Bearer value is plainly a code reference
370/// rather than a literal secret — e.g. `config.apiKey`, `process.env.API_KEY`,
371/// `req.body.clientSecret`, `getPassword()`, or a plain word identifier. Mirrors
372/// the cloud `looksLikeCodeReference`. A high-entropy token like `A1b2C3d4E5f6`
373/// has interior digits and so fails the word-identifier arm, falling through as
374/// a secret.
375fn looks_like_code_reference(value: &str) -> bool {
376    // Call / index expression: getPassword(), tokens[0].
377    if value.contains(['(', ')', '[', ']']) {
378        return true;
379    }
380    // Dotted member access: foo.bar.baz (each segment a JS identifier).
381    if is_dotted_member_access(value) {
382        return true;
383    }
384    // Word-shaped identifier (letters/underscores/`$`, optional TRAILING
385    // digits): apiKey, API_KEY, token2. Interior digits fall through.
386    if is_word_identifier(value) {
387        return true;
388    }
389    false
390}
391
392/// `^[A-Za-z_$][\w$]*(?:\.[A-Za-z_$][\w$]*)+$` — at least one dot, each segment
393/// a JS identifier.
394fn is_dotted_member_access(value: &str) -> bool {
395    if !value.contains('.') {
396        return false;
397    }
398    let mut segments = value.split('.');
399    let mut count = 0usize;
400    for seg in &mut segments {
401        if !is_js_identifier(seg) {
402            return false;
403        }
404        count += 1;
405    }
406    count >= 2
407}
408
409/// `^[A-Za-z_$][\w$]*$` — a single JS identifier segment.
410fn is_js_identifier(seg: &str) -> bool {
411    let mut chars = seg.chars();
412    match chars.next() {
413        Some(c) if c.is_ascii_alphabetic() || c == '_' || c == '$' => {}
414        _ => return false,
415    }
416    chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$')
417}
418
419/// `^[A-Za-z_$][A-Za-z_$]*\d*$` — letters/underscores/`$`, then optional
420/// TRAILING digits only (no interior digits). `apiKey`, `API_KEY`, `token2`.
421fn is_word_identifier(value: &str) -> bool {
422    let mut chars = value.chars();
423    match chars.next() {
424        Some(c) if c.is_ascii_alphabetic() || c == '_' || c == '$' => {}
425        _ => return false,
426    }
427    let mut seen_digit = false;
428    for c in chars {
429        if c.is_ascii_digit() {
430            seen_digit = true;
431        } else if seen_digit {
432            // A non-digit after a digit means interior digits → not a plain
433            // identifier (e.g. `A1b2`).
434            return false;
435        } else if !(c.is_ascii_alphabetic() || c == '_' || c == '$') {
436            return false;
437        }
438    }
439    true
440}
441
442/// True when a keyword-assignment value carries secret-like entropy: a
443/// letter+digit mix, base64 padding/separators at length, or a very long opaque
444/// token. Mirrors the cloud `hasSecretEntropy`. Plain words and short
445/// references are rejected so `password = secret` is never redacted.
446fn has_secret_entropy(value: &str) -> bool {
447    let has_letter = value.chars().any(|c| c.is_ascii_alphabetic());
448    let has_digit = value.chars().any(|c| c.is_ascii_digit());
449    if has_letter && has_digit {
450        return true;
451    }
452    let has_base64_punct = value.contains(['+', '/', '=']);
453    let len = value.chars().count();
454    if has_base64_punct && len >= 16 {
455        return true;
456    }
457    len >= 40
458}
459
460const PRIVATE_TAG_PAIRS: &[(&str, &str)] = &[
461    ("<private>", "</private>"),
462    ("<secret>", "</secret>"),
463    ("<sensitive>", "</sensitive>"),
464];
465
466pub fn strip_private_tagged_regions(input: &str) -> String {
467    let lower = input.to_ascii_lowercase();
468    let mut out = String::with_capacity(input.len());
469    let mut cursor = 0;
470
471    while let Some((start, open, close)) = next_private_open_tag(&lower, cursor) {
472        out.push_str(&input[cursor..start]);
473        out.push_str(PRIVATE_REDACTION);
474
475        let content_start = start + open.len();
476        cursor = match lower[content_start..].find(close) {
477            Some(rel_end) => content_start + rel_end + close.len(),
478            None => input.len(),
479        };
480    }
481
482    out.push_str(&input[cursor..]);
483    out
484}
485
486pub fn redact_secretish_tokens(input: &str) -> String {
487    let mut out = String::with_capacity(input.len());
488    let mut token = String::new();
489
490    for ch in input.chars() {
491        if ch.is_whitespace() {
492            push_redacted_token(&mut out, &token);
493            token.clear();
494            out.push(ch);
495        } else {
496            token.push(ch);
497        }
498    }
499    push_redacted_token(&mut out, &token);
500    out
501}
502
503fn push_redacted_token(out: &mut String, token: &str) {
504    if token.is_empty() {
505        return;
506    }
507    let trimmed = token.trim_matches(|c: char| {
508        matches!(
509            c,
510            '"' | '\'' | '`' | ',' | ';' | ':' | ')' | '(' | ']' | '[' | '{' | '}'
511        )
512    });
513    if looks_secretish(trimmed) {
514        let prefix_len = token.find(trimmed).unwrap_or(0);
515        let suffix_start = prefix_len + trimmed.len();
516        out.push_str(&token[..prefix_len]);
517        if let Some((key, _)) = trimmed.split_once('=') {
518            out.push_str(key);
519            out.push('=');
520        }
521        out.push_str(PRIVATE_REDACTION);
522        out.push_str(&token[suffix_start..]);
523    } else {
524        out.push_str(token);
525    }
526}
527
528fn looks_secretish(token: &str) -> bool {
529    let lower = token.to_ascii_lowercase();
530    let value = lower
531        .split_once('=')
532        .map_or(lower.as_str(), |(_, value)| value);
533    if value.starts_with("sk-") && value.len() >= 16 {
534        return true;
535    }
536    if value.starts_with("ghp_")
537        || value.starts_with("gho_")
538        || value.starts_with("ghu_")
539        || value.starts_with("ghs_")
540        || value.starts_with("github_pat_")
541    {
542        return value.len() >= 20;
543    }
544    let raw = token
545        .split_once('=')
546        .map_or(token, |(_, value)| value)
547        .trim();
548    raw.len() >= 20
549        && raw.starts_with("AKIA")
550        && raw
551            .chars()
552            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
553}
554
555fn next_private_open_tag(lower: &str, cursor: usize) -> Option<(usize, &str, &str)> {
556    PRIVATE_TAG_PAIRS
557        .iter()
558        .filter_map(|(open, close)| {
559            lower[cursor..]
560                .find(open)
561                .map(|rel| (cursor + rel, *open, *close))
562        })
563        .min_by_key(|(start, _, _)| *start)
564}
565
566#[cfg(test)]
567mod tests {
568    use super::*;
569
570    #[test]
571    fn strip_private_tagged_regions_redacts_known_tags() {
572        let input = "keep <private>token=abc</private> and <secret>sk-123</secret>";
573
574        let out = strip_private_tagged_regions(input);
575
576        assert_eq!(
577            out,
578            "keep [redacted private content] and [redacted private content]"
579        );
580        assert!(!out.contains("token=abc"));
581        assert!(!out.contains("sk-123"));
582    }
583
584    #[test]
585    fn strip_private_tagged_regions_is_case_insensitive() {
586        let out = strip_private_tagged_regions("a <Sensitive>customer</SENSITIVE> b");
587
588        assert_eq!(out, "a [redacted private content] b");
589    }
590
591    #[test]
592    fn strip_private_tagged_regions_redacts_unclosed_tag_to_end() {
593        let out = strip_private_tagged_regions("safe <private>do not store");
594
595        assert_eq!(out, "safe [redacted private content]");
596    }
597
598    #[test]
599    fn redact_secretish_tokens_redacts_common_raw_tokens() {
600        let out = redact_secretish_tokens(
601            "openai=sk-proj-abcdefghijklmnopqrstuvwxyz ghp_abcdefghijklmnopqrstuvwxyz AKIAABCDEFGHIJKLMNOP",
602        );
603
604        assert_eq!(
605            out,
606            "openai=[redacted private content] [redacted private content] [redacted private content]"
607        );
608    }
609
610    #[test]
611    fn redact_secretish_tokens_keeps_short_false_positives() {
612        let out = redact_secretish_tokens("use sk-test in docs and ticket ghp_short");
613
614        assert_eq!(out, "use sk-test in docs and ticket ghp_short");
615    }
616
617    // ── redact_secrets: one assertion per secret class, plus guards ──────────
618
619    const M: &str = SECRET_REDACTION_PLACEHOLDER;
620
621    /// Assert the input is fully scrubbed: the placeholder appears and no
622    /// substring of the original secret survives.
623    fn assert_redacted(input: &str, secret: &str) {
624        let out = redact_secrets(input);
625        assert!(out.contains(M), "expected redaction in {out:?}");
626        assert!(
627            !out.contains(secret),
628            "secret {secret:?} leaked through: {out:?}"
629        );
630    }
631
632    /// Assert the input is returned byte-for-byte (no false positive).
633    fn assert_untouched(input: &str) {
634        let out = redact_secrets(input);
635        assert_eq!(out, input, "false-positive redaction");
636        assert!(!out.contains(M), "false-positive redaction: {out:?}");
637    }
638
639    #[test]
640    fn redacts_github_token_classes() {
641        // gh[opsu]_ OAuth / PAT / app / refresh tokens.
642        for tok in [
643            "ghp_abcdefghijklmnopqrstuvwxyz0123",
644            "gho_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123",
645            "ghu_0123456789abcdefghijklmnopqrst",
646            "ghs_abcdefghijklmnopqrstuvwxyzABCD",
647        ] {
648            assert_redacted(&format!("token is {tok} here"), tok);
649        }
650        // github_pat_ fine-grained PAT.
651        let pat = "github_pat_11ABCDE0123456789abcdefABCDEF";
652        assert_redacted(&format!("see {pat} end"), pat);
653    }
654
655    #[test]
656    fn redacts_openai_style_sk_key() {
657        let key = "sk-abcdefghijklmnopqrstuvwxyz1234";
658        assert_redacted(&format!("key={key}"), key);
659    }
660
661    #[test]
662    fn redacts_slack_xox_token() {
663        // Synthetic, not a real token: matches the redaction regex
664        // (`xox[baprs]-[A-Za-z0-9-]{20,}`) without looking like a real Slack
665        // token, so secret scanners don't false-positive on this fixture.
666        let tok = "xoxb-EXAMPLEONLY-NOTAREALTOKEN-PLACEHOLDER";
667        assert_redacted(&format!("slack {tok} token"), tok);
668    }
669
670    #[test]
671    fn redacts_aws_akia_key() {
672        // AKIA + exactly 16 uppercase/digit chars.
673        let key = "AKIAIOSFODNN7EXAMPLE";
674        assert_redacted(&format!("aws id {key} here"), key);
675    }
676
677    #[test]
678    fn redacts_jwt_eyj_token() {
679        let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.\
680                   eyJzdWIiOiIxMjM0NTY3ODkwIn0.\
681                   dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
682        assert_redacted(&format!("jwt {jwt} end"), jwt);
683    }
684
685    #[test]
686    fn redacts_bearer_token() {
687        let tok = "abcdef1234567890XYZ";
688        let out = redact_secrets(&format!("Authorization: Bearer {tok}"));
689        // The `Bearer ` prefix is preserved; only the token is scrubbed.
690        assert_eq!(out, format!("Authorization: Bearer {M}"));
691    }
692
693    #[test]
694    fn redacts_named_secret_assignments_preserving_quotes() {
695        // High-entropy value (letter+digit mix) behind each keyword family.
696        let out = redact_secrets(r#"api_key = "A1b2C3d4E5f6G7h8""#);
697        assert_eq!(out, format!(r#"api_key = "{M}""#));
698
699        assert_redacted("access_token: Zx9Yw8Vu7Ts6Rq5Po4", "Zx9Yw8Vu7Ts6Rq5Po4");
700        assert_redacted("client_secret='Q1w2E3r4T5y6U7i8'", "Q1w2E3r4T5y6U7i8");
701        assert_redacted("password=Hunter2Hunter2Hunter2", "Hunter2Hunter2Hunter2");
702        // Long opaque base64-ish value with no digits still trips entropy.
703        assert_redacted(
704            "webhook_secret = AbCdEfGhIjKlMnOpQr/StUvWxYz+aBcDeFgHiJkLmNo",
705            "AbCdEfGhIjKlMnOpQr/StUvWxYz+aBcDeFgHiJkLmNo",
706        );
707    }
708
709    #[test]
710    fn guard_code_reference_value_is_not_redacted() {
711        // The canonical false positive: assigning from a config object.
712        assert_untouched("const apiKey = config.apiKey");
713        assert_untouched("token = process.env.API_KEY");
714        assert_untouched("const secret = req.body.clientSecret");
715        // Call / index expressions are code references too.
716        assert_untouched("password = getPassword()");
717        // Plain identifier value (no interior digits) is a reference.
718        assert_untouched("api_key = apiKeyVariable");
719        // `Bearer <identifier>` is a code reference, not a literal token.
720        assert_untouched("Bearer authorizationToken");
721    }
722
723    #[test]
724    fn guard_low_entropy_assignment_is_not_redacted() {
725        // Plain word value, no letter+digit mix / base64 / length → kept.
726        assert_untouched("password = secret");
727        assert_untouched("secret: changeme");
728    }
729
730    #[test]
731    fn guard_git_sha_is_not_redacted() {
732        // A 40-char hex commit sha carries no keyword/prefix → left intact.
733        assert_untouched("fixed in commit a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0");
734    }
735
736    #[test]
737    fn guard_uuid_is_not_redacted() {
738        assert_untouched("run id 550e8400-e29b-41d4-a716-446655440000 completed");
739    }
740
741    #[test]
742    fn guard_normal_prose_is_not_redacted() {
743        assert_untouched("Please validate the request body before returning a 413 status.");
744        assert_untouched("Add a regression test that asserts the panic is no longer reachable.");
745    }
746
747    #[test]
748    fn guard_keyword_substring_of_identifier_is_not_redacted() {
749        // `secret` is a prefix of `secretariat`; must not trigger the keyword
750        // class (no `\s*[:=]` follows the keyword boundary).
751        assert_untouched("the secretariat: A1b2C3d4E5f6 reviewed it");
752    }
753
754    #[test]
755    fn redacts_only_the_secret_inside_surrounding_prose() {
756        let key = "ghp_abcdefghijklmnopqrstuvwxyz0123";
757        let out = redact_secrets(&format!("Reviewer pasted {key} into the PR — rotate it."));
758        assert_eq!(out, format!("Reviewer pasted {M} into the PR — rotate it."));
759    }
760}