rtb_redact/lib.rs
1//! Free-form secret redaction for log lines, telemetry events, and
2//! diagnostic surfaces.
3//!
4//! See `docs/development/specs/2026-04-23-rtb-redact-v0.1.md` for the
5//! full design and the seven-pass rule set.
6//!
7//! ```
8//! use rtb_redact::string;
9//!
10//! let scrubbed = string("connect to postgres://app:hunter2@db/mydb");
11//! assert!(scrubbed.contains("[redacted]"));
12//! assert!(!scrubbed.contains("hunter2"));
13//! ```
14
15#![forbid(unsafe_code)]
16
17use std::borrow::Cow;
18use std::sync::LazyLock;
19
20use regex::Regex;
21
22/// Header names whose values must be redacted at DEBUG / TRACE log
23/// levels. Case-insensitive match via
24/// [`is_sensitive_header`]. `phf::Set` keeps lookup O(1) as the list
25/// grows.
26pub static SENSITIVE_HEADERS: phf::Set<&'static str> = phf::phf_set! {
27 "authorization",
28 "proxy-authorization",
29 "cookie",
30 "set-cookie",
31 "x-api-key",
32 "x-auth-token",
33 "x-amz-security-token",
34 "x-goog-api-key",
35 "x-anthropic-api-key",
36 "x-openai-api-key",
37};
38
39const REDACTED: &str = "[redacted]";
40
41/// Redact secrets in `input`, returning a borrowed [`Cow`] when no
42/// redactions apply and an owned `String` otherwise.
43///
44/// The rule set is applied in the documented order (see the spec).
45/// `input` is expected to be UTF-8 (it is already typed as `&str`);
46/// callers holding `&[u8]` must convert themselves.
47#[must_use]
48pub fn string(input: &str) -> Cow<'_, str> {
49 if input.is_empty() {
50 return Cow::Borrowed(input);
51 }
52
53 // Fast path: if none of the anchor characters or keywords appear
54 // in the string, no rule can match. Avoids allocating.
55 if !fast_has_sensitive_anchor(input) {
56 return Cow::Borrowed(input);
57 }
58
59 let mut out = input.to_string();
60 apply_rules(&mut out);
61
62 // If nothing actually changed, return Borrowed so callers don't
63 // pay for the clone on false positives of the fast-path check.
64 if out == input {
65 Cow::Borrowed(input)
66 } else {
67 // SAFETY of correctness: `out` was derived from the input via
68 // regex replacements; there's no way to get back to &input
69 // from here without the Owned wrapper.
70 Cow::Owned(out)
71 }
72}
73
74/// Same as [`string`], but writes into a caller-supplied `String`.
75/// Useful for hot loops that want to reuse a buffer.
76pub fn string_into(input: &str, out: &mut String) {
77 out.clear();
78 if input.is_empty() {
79 return;
80 }
81 if !fast_has_sensitive_anchor(input) {
82 out.push_str(input);
83 return;
84 }
85 out.push_str(input);
86 apply_rules(out);
87}
88
89/// Case-insensitive membership check against [`SENSITIVE_HEADERS`].
90#[must_use]
91pub fn is_sensitive_header(name: &str) -> bool {
92 // `phf::Set` is case-sensitive. We lowercase the name into a small
93 // stack-friendly buffer; realistic header names are < 64 chars.
94 let mut buf = [0u8; 128];
95 let bytes = name.as_bytes();
96 if bytes.len() > buf.len() {
97 // Oversized header names can't be in the known-sensitive list.
98 return false;
99 }
100 for (i, &b) in bytes.iter().enumerate() {
101 buf[i] = b.to_ascii_lowercase();
102 }
103 // Safe: ASCII-lowercased ASCII stays ASCII stays UTF-8.
104 let lower = std::str::from_utf8(&buf[..bytes.len()]).unwrap_or("");
105 SENSITIVE_HEADERS.contains(lower)
106}
107
108/// Unconditionally redact a header value. Callers invoke this for
109/// any header name matching [`is_sensitive_header`].
110#[must_use]
111pub fn redact_header_value(value: &str) -> String {
112 if value.is_empty() {
113 String::new()
114 } else {
115 REDACTED.to_string()
116 }
117}
118
119// ---------------------------------------------------------------------
120// Internal: fast-path pre-check.
121// ---------------------------------------------------------------------
122
123/// Returns `true` if `input` contains any character or substring that
124/// could plausibly trigger a redaction rule. This avoids allocating
125/// and running seven regexes over clean strings.
126fn fast_has_sensitive_anchor(input: &str) -> bool {
127 // Any of these anchors could indicate a match. The check is
128 // intentionally loose — false positives here cost one allocation
129 // and seven regex runs on a small string.
130 input.contains('@') // URL userinfo
131 || input.contains('=') // query params
132 || input.contains('?')
133 || input.contains('-') // token prefixes use hyphens heavily
134 || input.contains('_')
135 || input.contains('.') // JWT dots, AWS prefixes
136 || input.contains("-----BEGIN ")
137 || has_auth_scheme(input)
138 || has_long_run(input)
139}
140
141/// Crude check for "contains a whitespace-delimited run of 40+ chars
142/// that could be a token."
143fn has_long_run(input: &str) -> bool {
144 let mut run = 0usize;
145 for b in input.bytes() {
146 if b.is_ascii_alphanumeric()
147 || b == b'+'
148 || b == b'/'
149 || b == b'='
150 || b == b'_'
151 || b == b'-'
152 {
153 run += 1;
154 if run >= 40 {
155 return true;
156 }
157 } else {
158 run = 0;
159 }
160 }
161 false
162}
163
164fn has_auth_scheme(input: &str) -> bool {
165 // Case-insensitive lookahead for the three tokens we act on.
166 // Simple byte scan is enough; we're avoiding a regex here.
167 let lower_bytes = input.as_bytes();
168 for window in lower_bytes.windows(7) {
169 let w = window;
170 if eq_ignore_ascii_case(w, b"bearer ")
171 || eq_ignore_ascii_case(&w[..6], b"basic ")
172 || eq_ignore_ascii_case(&w[..6], b"token ")
173 {
174 return true;
175 }
176 }
177 false
178}
179
180fn eq_ignore_ascii_case(a: &[u8], b: &[u8]) -> bool {
181 if a.len() < b.len() {
182 return false;
183 }
184 a[..b.len()].iter().zip(b.iter()).all(|(x, y)| x.eq_ignore_ascii_case(y))
185}
186
187// ---------------------------------------------------------------------
188// Internal: rule application.
189// ---------------------------------------------------------------------
190
191fn apply_rules(out: &mut String) {
192 // 1. URL userinfo
193 replace_all(out, &RE_URL_USERINFO, |caps| format!("{}://{REDACTED}@", &caps[1]));
194 // 2. Authorization-header-style values.
195 replace_all(out, &RE_AUTH_SCHEME, |caps| format!("{} {REDACTED}", &caps[1]));
196 // 3. Query-parameter sensitive keys.
197 replace_all(out, &RE_QUERY_SENSITIVE, |caps| format!("{}={REDACTED}", &caps[1]));
198 // 7. PEM private key blocks — run before token rules so the
199 // key body (which can otherwise match the long-run rule) is
200 // collapsed into a single REDACTED marker.
201 replace_all(out, &RE_PEM_BLOCK, |_caps| {
202 "-----BEGIN PRIVATE KEY-----\n[redacted]\n-----END PRIVATE KEY-----".to_string()
203 });
204 // 4. Well-known credential prefixes.
205 replace_all(out, &RE_NAMED_PREFIX, |caps| {
206 let matched = &caps[0];
207 if matched.len() >= 20 {
208 REDACTED.to_string()
209 } else {
210 matched.to_string()
211 }
212 });
213 // 6. JWT-shaped tokens. (Run before the generic long-run rule so
214 // partial overlaps don't produce a half-masked JWT.) The spec
215 // requires total length >= 100 chars — short "eyJ.x.y.z"
216 // strings pass through unchanged.
217 replace_all(out, &RE_JWT, |caps| {
218 let matched = &caps[0];
219 if matched.len() >= 100 {
220 REDACTED.to_string()
221 } else {
222 matched.to_string()
223 }
224 });
225 // 5. Long opaque tokens. The captured boundary chars (leading /
226 // trailing whitespace, or start / end of input) are re-emitted
227 // verbatim so the word spacing around the redaction is preserved.
228 replace_all(out, &RE_LONG_OPAQUE, |caps| format!("{}{REDACTED}{}", &caps[1], &caps[3]));
229}
230
231fn replace_all<F>(buf: &mut String, re: &Regex, mut f: F)
232where
233 F: FnMut(®ex::Captures<'_>) -> String,
234{
235 // Build a new string if any match exists; otherwise leave `buf`
236 // untouched. We don't use `Regex::replace_all` with a closure
237 // directly because it takes a Replacer by value and we want to
238 // keep the API on `&mut String` rather than allocating a Cow.
239 if !re.is_match(buf) {
240 return;
241 }
242 let mut out = String::with_capacity(buf.len());
243 let mut last = 0;
244 for caps in re.captures_iter(buf) {
245 let whole = caps.get(0).expect("captures always include group 0");
246 out.push_str(&buf[last..whole.start()]);
247 out.push_str(&f(&caps));
248 last = whole.end();
249 }
250 out.push_str(&buf[last..]);
251 *buf = out;
252}
253
254// ---------------------------------------------------------------------
255// Internal: compiled patterns. All literal; compiled once.
256// ---------------------------------------------------------------------
257
258static RE_URL_USERINFO: LazyLock<Regex> = LazyLock::new(|| {
259 // Matches scheme://user:pass@ (userinfo). Captures the scheme
260 // so we can re-emit it.
261 Regex::new(r"([a-zA-Z][a-zA-Z0-9+.-]*)://[^:\s/?#]+:[^@\s]+@").expect("valid regex")
262});
263
264static RE_AUTH_SCHEME: LazyLock<Regex> = LazyLock::new(|| {
265 // Bearer / Basic / Token <credential>. The credential is whatever
266 // follows the scheme until whitespace or end.
267 Regex::new(r"(?i)\b(Bearer|Basic|Token)\s+[A-Za-z0-9_\-.+/=]+").expect("valid regex")
268});
269
270static RE_QUERY_SENSITIVE: LazyLock<Regex> = LazyLock::new(|| {
271 // Match `<sensitive-key>=<value>` in query strings. Value runs up
272 // to `&`, whitespace, or end. Case-insensitive key match.
273 Regex::new(
274 r"(?i)([?&]?(?:api[_-]?key|access[_-]?token|refresh[_-]?token|token|password|passwd|secret|signature|sig|auth|x[_-]?api[_-]?key))=[^&\s#]+"
275 )
276 .expect("valid regex")
277});
278
279static RE_NAMED_PREFIX: LazyLock<Regex> = LazyLock::new(|| {
280 // Well-known provider tokens. Matches the prefix + trailing
281 // allowed characters. The closure in `apply_rules` checks for
282 // total length >= 20 before redacting.
283 Regex::new(
284 r"(?x)
285 \b(
286 sk-ant-[A-Za-z0-9_\-]+
287 | sk-[A-Za-z0-9_\-]+
288 | (?:ghp|gho|ghs|ghu)_[A-Za-z0-9]+
289 | glpat-[A-Za-z0-9_\-]+
290 | AIza[A-Za-z0-9_\-]+
291 | (?:AKIA|ASIA)[A-Z0-9]+
292 | xox[baprs]-[A-Za-z0-9\-]+
293 | SG\.[A-Za-z0-9_\-]{22,}\.[A-Za-z0-9_\-]{43,}
294 )
295 ",
296 )
297 .expect("valid regex")
298});
299
300static RE_JWT: LazyLock<Regex> = LazyLock::new(|| {
301 // eyJ... . ... . ... totalling >= 100 chars.
302 Regex::new(r"eyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+").expect("valid regex")
303});
304
305static RE_LONG_OPAQUE: LazyLock<Regex> = LazyLock::new(|| {
306 // Whitespace-bounded run of 40+ base64/hex-ish chars. The boundary
307 // chars are explicit capture groups so the replacement closure can
308 // preserve the surrounding whitespace.
309 Regex::new(r"(^|\s)([A-Za-z0-9+/=_\-]{40,})(\s|$)").expect("valid regex")
310});
311
312static RE_PEM_BLOCK: LazyLock<Regex> = LazyLock::new(|| {
313 // DOTALL via (?s) so `.` crosses newlines.
314 Regex::new(r"(?s)-----BEGIN [A-Z ]*PRIVATE KEY-----.*?-----END [A-Z ]*PRIVATE KEY-----")
315 .expect("valid regex")
316});