Skip to main content

wallfacer_core/
redact.rs

1//! Redaction of secrets before persistence.
2//!
3//! Findings and target configuration may contain bearer tokens, cookies, or
4//! payload fields named like secrets (`api_key`, `password`, ...). Anything
5//! that lands on disk in `.wallfacer/` or in CI artefacts must first be
6//! filtered through [`Redact::redacted`] so cleartext secrets do not leak into
7//! corpus files, SARIF output, or shared logs.
8
9use std::sync::OnceLock;
10
11use regex::Regex;
12use serde_json::{Map, Value};
13
14use crate::{
15    finding::{Finding, ReproInfo},
16    target::{Target, Transport},
17};
18
19/// Placeholder substituted for any value that matched a redaction pattern.
20pub const REDACTED_PLACEHOLDER: &str = "<redacted>";
21
22/// Trait implemented by types that can produce a copy with sensitive fields
23/// scrubbed. Implementations must be **idempotent** and **non-destructive**:
24/// they return a new value rather than mutating in place, so callers can keep
25/// the original around for in-process use (e.g. reproducing a call) while
26/// persisting only the scrubbed copy.
27pub trait Redact {
28    /// Returns a deep copy with sensitive fields replaced by
29    /// [`REDACTED_PLACEHOLDER`].
30    fn redacted(&self) -> Self;
31}
32
33/// Returns `true` if a HTTP header name should have its value masked before
34/// persistence.
35///
36/// Matches (case-insensitive):
37/// * `Authorization`, `Proxy-Authorization`
38/// * `Cookie`, `Set-Cookie`
39/// * any name containing `token`, `secret`, `password`, `bearer`, `api-key`,
40///   `api_key`, `apikey`
41pub fn is_sensitive_header(name: &str) -> bool {
42    let lower = name.to_ascii_lowercase();
43    matches!(
44        lower.as_str(),
45        "authorization" | "proxy-authorization" | "cookie" | "set-cookie"
46    ) || contains_secret_marker(&lower)
47}
48
49/// Returns `true` if a JSON object key likely identifies a secret value.
50///
51/// The match is case-insensitive and looks for the same markers as
52/// [`is_sensitive_header`], plus standalone `auth` (only as a whole word, to
53/// avoid matching unrelated names like `author`).
54pub fn is_sensitive_key(name: &str) -> bool {
55    let lower = name.to_ascii_lowercase();
56    if contains_secret_marker(&lower) {
57        return true;
58    }
59    // Standalone `auth` word. We split on common separators so `auth_kind` or
60    // `kind-auth` match, while `author` and `authentik` do not.
61    lower
62        .split(|c: char| !c.is_ascii_alphanumeric())
63        .any(|segment| segment == "auth")
64}
65
66fn contains_secret_marker(lower: &str) -> bool {
67    const MARKERS: &[&str] = &[
68        "token",
69        "secret",
70        "password",
71        "passwd",
72        "bearer",
73        "api-key",
74        "api_key",
75        "apikey",
76        "private-key",
77        "private_key",
78    ];
79    MARKERS.iter().any(|marker| lower.contains(marker))
80}
81
82/// Returns a copy of `text` with secret-like substrings replaced by
83/// [`REDACTED_PLACEHOLDER`]. Patterns matched (case-insensitive):
84///
85/// * `Authorization: Bearer <token>` / `Bearer <token>` (also `Basic`).
86/// * `<sensitive-key> = <value>` and `<sensitive-key>: <value>` where
87///   `<sensitive-key>` matches the same keyword set as
88///   [`is_sensitive_key`].
89///
90/// The harness writes server output verbatim into `Finding::details` when
91/// a schema violation triggers, so secrets that show up in error
92/// messages or echoed payloads would otherwise leak into the corpus.
93/// This is a defence-in-depth pass on top of [`redact_json`]; the docs
94/// (`docs/security.md`) still describe redaction as best-effort and
95/// pattern-based.
96pub fn redact_string(text: &str) -> String {
97    let mut out = text.to_string();
98    for pattern in string_patterns() {
99        out = pattern
100            .replace_all(&out, |caps: &regex::Captures<'_>| {
101                // Capture group 1 holds the prefix to keep, group 2 (if
102                // present) the secret to mask. When only one group exists
103                // the entire match is masked.
104                if let Some(prefix) = caps.get(1) {
105                    if caps.get(2).is_some() {
106                        return format!("{}{REDACTED_PLACEHOLDER}", prefix.as_str());
107                    }
108                }
109                REDACTED_PLACEHOLDER.to_string()
110            })
111            .into_owned();
112    }
113    out
114}
115
116fn string_patterns() -> &'static [Regex] {
117    static PATTERNS: OnceLock<Vec<Regex>> = OnceLock::new();
118    PATTERNS.get_or_init(|| {
119        // Each pattern's first capture group is the literal prefix kept
120        // verbatim; the second (where present) is the secret value that
121        // gets replaced.
122        let raw: &[&str] = &[
123            // `Bearer <tok>` / `Basic <tok>` (with optional `Authorization:`).
124            r"(?i)((?:authorization\s*:\s*)?(?:bearer|basic)\s+)([A-Za-z0-9._\-+/=]{6,})",
125            // `key=value` / `key:value` / `"key":"value"` for known sensitive keys.
126            // Stops at whitespace or common delimiters so surrounding text is preserved.
127            r#"(?i)((?:^|[\s,;{(\["'])(?:authorization|api[-_]?key|apikey|access[-_]?token|refresh[-_]?token|secret|client[-_]?secret|password|passwd|bearer|private[-_]?key|token)["']?\s*[:=]\s*["']?)([^"',;\s\)\]\}]{4,})"#,
128        ];
129        raw.iter()
130            .filter_map(|src| Regex::new(src).ok())
131            .collect()
132    })
133}
134
135/// Recursively redacts a JSON value: any object entry whose key matches
136/// [`is_sensitive_key`] has its value replaced by [`REDACTED_PLACEHOLDER`].
137/// Arrays and nested objects are walked.
138pub fn redact_json(value: &Value) -> Value {
139    match value {
140        Value::Object(map) => {
141            let mut out = Map::with_capacity(map.len());
142            for (key, child) in map {
143                if is_sensitive_key(key) {
144                    out.insert(key.clone(), Value::String(REDACTED_PLACEHOLDER.to_string()));
145                } else {
146                    out.insert(key.clone(), redact_json(child));
147                }
148            }
149            Value::Object(out)
150        }
151        Value::Array(items) => Value::Array(items.iter().map(redact_json).collect()),
152        other => other.clone(),
153    }
154}
155
156impl Redact for Target {
157    fn redacted(&self) -> Self {
158        let transport = match &self.transport {
159            Transport::Stdio { command, args, env } => {
160                let env = env
161                    .iter()
162                    .map(|(name, value)| {
163                        let masked = if is_sensitive_key(name) {
164                            REDACTED_PLACEHOLDER.to_string()
165                        } else {
166                            value.clone()
167                        };
168                        (name.clone(), masked)
169                    })
170                    .collect();
171                Transport::Stdio {
172                    command: command.clone(),
173                    args: args.clone(),
174                    env,
175                }
176            }
177            Transport::Http { url, headers } => {
178                let headers = headers
179                    .iter()
180                    .map(|(name, value)| {
181                        let masked = if is_sensitive_header(name) {
182                            REDACTED_PLACEHOLDER.to_string()
183                        } else {
184                            value.clone()
185                        };
186                        (name.clone(), masked)
187                    })
188                    .collect();
189                Transport::Http {
190                    url: url.clone(),
191                    headers,
192                }
193            }
194        };
195        Self {
196            transport,
197            timeout_ms: self.timeout_ms,
198        }
199    }
200}
201
202impl Redact for ReproInfo {
203    fn redacted(&self) -> Self {
204        Self {
205            seed: self.seed,
206            tool_call: redact_json(&self.tool_call),
207            transport: self.transport.clone(),
208            composition_trail: self.composition_trail.clone(),
209        }
210    }
211}
212
213impl Redact for Finding {
214    fn redacted(&self) -> Self {
215        Self {
216            id: self.id.clone(),
217            kind: self.kind.clone(),
218            severity: self.severity,
219            tool: self.tool.clone(),
220            // `message` and `details` carry server-supplied text (error
221            // strings, echoed payload fragments). Run them through the
222            // string-level redactor so a tool that echoes credentials
223            // back doesn't leak them into corpus files. Best-effort:
224            // see `docs/security.md`.
225            message: redact_string(&self.message),
226            details: redact_string(&self.details),
227            repro: self.repro.redacted(),
228            timestamp: self.timestamp,
229        }
230    }
231}
232
233#[cfg(test)]
234#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
235mod tests {
236    use super::*;
237    use serde_json::json;
238    use std::collections::HashMap;
239
240    #[test]
241    fn header_authorization_is_sensitive() {
242        assert!(is_sensitive_header("Authorization"));
243        assert!(is_sensitive_header("authorization"));
244        assert!(is_sensitive_header("AUTHORIZATION"));
245    }
246
247    #[test]
248    fn header_cookie_variants_are_sensitive() {
249        assert!(is_sensitive_header("Cookie"));
250        assert!(is_sensitive_header("Set-Cookie"));
251        assert!(is_sensitive_header("set-cookie"));
252        assert!(is_sensitive_header("Proxy-Authorization"));
253    }
254
255    #[test]
256    fn header_x_token_pattern_is_sensitive() {
257        assert!(is_sensitive_header("X-API-Token"));
258        assert!(is_sensitive_header("X-Auth-Token"));
259        assert!(is_sensitive_header("x-custom-token"));
260        assert!(is_sensitive_header("X-Bearer"));
261    }
262
263    #[test]
264    fn header_api_key_variants_are_sensitive() {
265        assert!(is_sensitive_header("X-API-Key"));
266        assert!(is_sensitive_header("Api-Key"));
267        assert!(is_sensitive_header("apikey"));
268    }
269
270    #[test]
271    fn header_benign_is_not_sensitive() {
272        assert!(!is_sensitive_header("Content-Type"));
273        assert!(!is_sensitive_header("Accept"));
274        assert!(!is_sensitive_header("User-Agent"));
275        assert!(!is_sensitive_header("X-Request-Id"));
276    }
277
278    #[test]
279    fn key_password_is_sensitive() {
280        assert!(is_sensitive_key("password"));
281        assert!(is_sensitive_key("passwd"));
282        assert!(is_sensitive_key("user_password"));
283    }
284
285    #[test]
286    fn key_secret_and_token_variants_are_sensitive() {
287        assert!(is_sensitive_key("secret"));
288        assert!(is_sensitive_key("clientSecret"));
289        assert!(is_sensitive_key("access_token"));
290        assert!(is_sensitive_key("private_key"));
291    }
292
293    #[test]
294    fn key_auth_word_is_sensitive_only_as_whole_word() {
295        assert!(is_sensitive_key("auth"));
296        assert!(is_sensitive_key("auth_kind"));
297        assert!(is_sensitive_key("kind-auth"));
298        // "author" and "authentik" must NOT trigger.
299        assert!(!is_sensitive_key("author"));
300        assert!(!is_sensitive_key("authority"));
301    }
302
303    #[test]
304    fn key_benign_is_not_sensitive() {
305        assert!(!is_sensitive_key("name"));
306        assert!(!is_sensitive_key("id"));
307        assert!(!is_sensitive_key("value"));
308        assert!(!is_sensitive_key("count"));
309    }
310
311    #[test]
312    fn redact_json_walks_nested_objects() {
313        let input = json!({
314            "user": "alice",
315            "credentials": {
316                "password": "p@ss",
317                "api_key": "secret-123"
318            },
319            "items": [
320                { "value": 1, "token": "t-1" },
321                { "value": 2, "token": "t-2" }
322            ]
323        });
324        let output = redact_json(&input);
325        assert_eq!(output["user"], json!("alice"));
326        assert_eq!(
327            output["credentials"]["password"],
328            json!(REDACTED_PLACEHOLDER)
329        );
330        assert_eq!(
331            output["credentials"]["api_key"],
332            json!(REDACTED_PLACEHOLDER)
333        );
334        assert_eq!(output["items"][0]["value"], json!(1));
335        assert_eq!(output["items"][0]["token"], json!(REDACTED_PLACEHOLDER));
336        assert_eq!(output["items"][1]["token"], json!(REDACTED_PLACEHOLDER));
337    }
338
339    #[test]
340    fn redact_is_idempotent() {
341        let input = json!({"password": "x", "api_key": "y"});
342        let once = redact_json(&input);
343        let twice = redact_json(&once);
344        assert_eq!(once, twice);
345    }
346
347    #[test]
348    fn redact_target_http_masks_authorization() {
349        let mut headers = HashMap::new();
350        headers.insert("Authorization".to_string(), "Bearer abc123".to_string());
351        headers.insert("Content-Type".to_string(), "application/json".to_string());
352        let target = Target {
353            transport: Transport::Http {
354                url: "http://localhost".to_string(),
355                headers,
356            },
357            timeout_ms: 1000,
358        };
359        let redacted = target.redacted();
360        let Transport::Http { headers, .. } = &redacted.transport else {
361            panic!("expected http transport");
362        };
363        assert_eq!(
364            headers.get("Authorization").map(String::as_str),
365            Some(REDACTED_PLACEHOLDER)
366        );
367        assert_eq!(
368            headers.get("Content-Type").map(String::as_str),
369            Some("application/json")
370        );
371    }
372
373    #[test]
374    fn redact_target_stdio_masks_secret_env() {
375        let mut env = HashMap::new();
376        env.insert("API_TOKEN".to_string(), "tok-1".to_string());
377        env.insert("PATH".to_string(), "/usr/bin".to_string());
378        let target = Target {
379            transport: Transport::Stdio {
380                command: "python3".to_string(),
381                args: vec!["server.py".to_string()],
382                env,
383            },
384            timeout_ms: 1000,
385        };
386        let redacted = target.redacted();
387        let Transport::Stdio { env, .. } = &redacted.transport else {
388            panic!("expected stdio transport");
389        };
390        assert_eq!(
391            env.get("API_TOKEN").map(String::as_str),
392            Some(REDACTED_PLACEHOLDER)
393        );
394        assert_eq!(env.get("PATH").map(String::as_str), Some("/usr/bin"));
395    }
396
397    #[test]
398    fn redact_string_masks_bearer_tokens() {
399        let input = "got Authorization: Bearer abcDEF123456 from server";
400        let output = redact_string(input);
401        assert!(
402            output.contains(REDACTED_PLACEHOLDER),
403            "expected redaction in {output:?}"
404        );
405        assert!(!output.contains("abcDEF123456"));
406    }
407
408    #[test]
409    fn redact_string_masks_kv_secrets() {
410        let cases = [
411            "error: api_key=sk-abcdef12345 not found",
412            "params: password: hunter22 expired",
413            r#"{"access_token": "tok-1234"}"#,
414        ];
415        for input in cases {
416            let output = redact_string(input);
417            assert!(
418                output.contains(REDACTED_PLACEHOLDER),
419                "expected redaction in {output:?}"
420            );
421        }
422    }
423
424    #[test]
425    fn redact_string_passes_through_benign_text() {
426        let benign = "tool returned 5 items in 12ms";
427        assert_eq!(redact_string(benign), benign);
428    }
429
430    #[test]
431    fn redact_finding_masks_message_and_details() {
432        use crate::finding::{Finding, FindingKind};
433        let finding = Finding::new(
434            FindingKind::SchemaViolation,
435            "tool",
436            "auth failed: api_key=sk-leaked-abc123",
437            "server response: Authorization: Bearer leak-token-xyz",
438            ReproInfo {
439                seed: 1,
440                tool_call: json!({}),
441                transport: "stdio".to_string(),
442                composition_trail: Vec::new(),
443            },
444        );
445        let redacted = finding.redacted();
446        assert!(redacted.message.contains(REDACTED_PLACEHOLDER));
447        assert!(!redacted.message.contains("sk-leaked-abc123"));
448        assert!(redacted.details.contains(REDACTED_PLACEHOLDER));
449        assert!(!redacted.details.contains("leak-token-xyz"));
450    }
451
452    #[test]
453    fn redact_finding_masks_repro_payload() {
454        use crate::finding::{Finding, FindingKind};
455        let finding = Finding::new(
456            FindingKind::Crash,
457            "tool",
458            "msg",
459            "details",
460            ReproInfo {
461                seed: 1,
462                tool_call: json!({"password": "p", "name": "alice"}),
463                transport: "stdio".to_string(),
464                composition_trail: Vec::new(),
465            },
466        );
467        let original_id = finding.id.clone();
468        let redacted = finding.redacted();
469        // ID is preserved (computed from the original payload).
470        assert_eq!(redacted.id, original_id);
471        assert_eq!(
472            redacted.repro.tool_call["password"],
473            json!(REDACTED_PLACEHOLDER)
474        );
475        assert_eq!(redacted.repro.tool_call["name"], json!("alice"));
476    }
477}