Skip to main content

hematite/agent/
edge_redact.rs

1// Edge Redaction — privacy-preserving filter for MCP server mode.
2//
3// Runs after inspect_host() and before the response crosses the wire to any
4// cloud agent. Strips identifiers that should never leave the machine:
5//   - Usernames embedded in file paths
6//   - MAC addresses
7//   - Hardware serial numbers
8//   - Hostnames / computer names
9//   - Credential-shaped values (API keys, tokens, passwords)
10//   - AWS access key IDs
11//
12// Each category is tracked separately so the cloud model receives a clear
13// redaction receipt explaining what was sanitized and how much, without
14// revealing the original values.
15//
16// Enable: hematite --mcp-server --edge-redact
17
18use lazy_static::lazy_static;
19use regex::Regex;
20use std::collections::BTreeMap;
21use std::fmt::Write as _;
22
23pub struct RedactResult {
24    /// Sanitized text safe to send to a cloud model.
25    pub text: String,
26    /// Total number of individual substitutions made.
27    pub redaction_count: usize,
28    /// Human-readable summary line prepended to the text.
29    pub summary_header: String,
30    /// Per-category hit counts — used by the audit trail.
31    pub tier1_hits: BTreeMap<&'static str, usize>,
32}
33
34struct Pattern {
35    re: Regex,
36    label: &'static str,
37    replacement: &'static str,
38}
39
40lazy_static! {
41    static ref PATTERNS: Vec<Pattern> = vec![
42        // Windows username in paths: C:\Users\<name>\ or C:/Users/<name>/
43        // Trailing separator is optional so bare paths like C:\Users\ocean are also caught.
44        Pattern {
45            re: Regex::new(r"(?i)(C:[/\\]Users[/\\])([^/\\\r\n\t ]+)([/\\]?)").unwrap(),
46            label: "username-path",
47            replacement: "${1}[USER]${3}",
48        },
49        // Linux/macOS home paths: /home/<name>/ or /Users/<name>/
50        // Trailing slash is optional so paths at end-of-line are also caught.
51        Pattern {
52            re: Regex::new(r"(/(?:home|Users)/)([^/\r\n\t ]+)(/?)").unwrap(),
53            label: "username-path",
54            replacement: "${1}[USER]${3}",
55        },
56        // MAC addresses (colon or hyphen-separated)
57        Pattern {
58            re: Regex::new(r"\b([0-9A-Fa-f]{2}[:\-]){5}[0-9A-Fa-f]{2}\b").unwrap(),
59            label: "mac-address",
60            replacement: "[MAC]",
61        },
62        // Hardware / disk serial numbers
63        Pattern {
64            re: Regex::new(r"(?i)(serial\s*(?:number)?[:=]\s*)([^\s\r\n]{4,})").unwrap(),
65            label: "serial-number",
66            replacement: "${1}[SERIAL]",
67        },
68        // Computer / hostname labels
69        Pattern {
70            re: Regex::new(
71                r"(?i)((?:hostname|computer\s*name|machine\s*name|device\s*name|netbios\s*name)\s*[:=]\s*)([^\s\r\n]+)"
72            ).unwrap(),
73            label: "hostname",
74            replacement: "${1}[HOSTNAME]",
75        },
76        // AWS access key IDs
77        Pattern {
78            re: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").unwrap(),
79            label: "aws-key",
80            replacement: "[AWS-KEY]",
81        },
82        // Generic credential values: KEY=xxx, TOKEN=xxx, PASSWORD=xxx, etc.
83        // Only fires when the label looks credential-shaped and value is ≥8 chars.
84        Pattern {
85            re: Regex::new(
86                r"(?i)((?:api[_\-]?key|secret[_\-]?key|access[_\-]?token|auth[_\-]?token|password|passwd|pwd|private[_\-]?key|client[_\-]?secret)[^\s=:]*\s*[:=]\s*)(\S{8,})"
87            ).unwrap(),
88            label: "credential",
89            replacement: "${1}[REDACTED]",
90        },
91    ];
92}
93
94/// Apply all redaction patterns to `input`.
95/// Returns the sanitized text plus a receipt of what was removed.
96pub fn redact(input: &str) -> RedactResult {
97    let mut text = input.to_string();
98    let mut counts: BTreeMap<&'static str, usize> = BTreeMap::new();
99
100    for pattern in PATTERNS.iter() {
101        let hits = pattern.re.find_iter(&text).count();
102        if hits > 0 {
103            *counts.entry(pattern.label).or_insert(0) += hits;
104            text = pattern
105                .re
106                .replace_all(&text, pattern.replacement)
107                .into_owned();
108        }
109    }
110
111    let total: usize = counts.values().sum();
112
113    let summary_header = if total == 0 {
114        String::from("[edge-redact: no sensitive patterns detected]")
115    } else {
116        let mut detail = String::with_capacity(counts.len() * 20);
117        for (i, (label, n)) in counts.iter().enumerate() {
118            if i > 0 {
119                detail.push_str(", ");
120            }
121            detail.push_str(label);
122            let _ = write!(detail, " \u{00d7}{n}");
123        }
124        format!(
125            "[edge-redact: {total} substitution(s) — {detail} — values replaced before leaving this machine]"
126        )
127    };
128
129    RedactResult {
130        text,
131        redaction_count: total,
132        summary_header,
133        tier1_hits: counts,
134    }
135}
136
137/// Wrap a tool result with the edge-redact header so the cloud model
138/// always sees a clear privacy receipt at the top of the response.
139pub fn apply(raw: &str) -> String {
140    let result = redact(raw);
141    format!("{}\n\n{}", result.summary_header, result.text)
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn redacts_windows_username_path() {
150        let input = "path: C:\\Users\\johndoe\\Documents\\project";
151        let r = redact(input);
152        assert!(r.text.contains("[USER]"), "should redact username");
153        assert!(
154            !r.text.contains("johndoe"),
155            "should not contain raw username"
156        );
157        assert!(r.redaction_count > 0);
158    }
159
160    #[test]
161    fn redacts_mac_address() {
162        let input = "MAC: 00:1A:2B:3C:4D:5E adapter connected";
163        let r = redact(input);
164        assert!(r.text.contains("[MAC]"), "should redact MAC");
165        assert!(
166            !r.text.contains("00:1A:2B:3C:4D:5E"),
167            "raw MAC must not appear"
168        );
169    }
170
171    #[test]
172    fn redacts_serial_number() {
173        let input = "SerialNumber: WD-WX12345678";
174        let r = redact(input);
175        assert!(r.text.contains("[SERIAL]"), "should redact serial");
176        assert!(
177            !r.text.contains("WD-WX12345678"),
178            "raw serial must not appear"
179        );
180    }
181
182    #[test]
183    fn redacts_hostname_label() {
184        let input = "ComputerName: CORP-LAPTOP-007";
185        let r = redact(input);
186        assert!(r.text.contains("[HOSTNAME]"), "should redact hostname");
187        assert!(
188            !r.text.contains("CORP-LAPTOP-007"),
189            "raw hostname must not appear"
190        );
191    }
192
193    #[test]
194    fn redacts_aws_key() {
195        let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE found in env";
196        let r = redact(input);
197        assert!(r.text.contains("[AWS-KEY]"), "should redact AWS key");
198        assert!(
199            !r.text.contains("AKIAIOSFODNN7EXAMPLE"),
200            "raw key must not appear"
201        );
202    }
203
204    #[test]
205    fn redacts_credential_value() {
206        let input = "API_KEY=sk-supersecretvalue123 exported";
207        let r = redact(input);
208        assert!(
209            r.text.contains("[REDACTED]"),
210            "should redact credential value"
211        );
212        assert!(
213            !r.text.contains("sk-supersecretvalue123"),
214            "raw secret must not appear"
215        );
216    }
217
218    #[test]
219    fn clean_input_passes_through_unchanged() {
220        let input = "Processes: 42 running\nCPU: 12%\nRAM: 8.1 GB / 32 GB";
221        let r = redact(input);
222        assert_eq!(r.redaction_count, 0);
223        assert_eq!(r.text, input);
224        assert!(r.summary_header.contains("no sensitive patterns"));
225    }
226
227    #[test]
228    fn apply_always_prepends_header() {
229        let out = apply("CPU: 15%");
230        assert!(out.starts_with("[edge-redact:"), "header must be first");
231    }
232}