hematite/agent/
edge_redact.rs1use lazy_static::lazy_static;
19use regex::Regex;
20use std::collections::BTreeMap;
21
22pub struct RedactResult {
23 pub text: String,
25 pub redaction_count: usize,
27 pub summary_header: String,
29 pub tier1_hits: BTreeMap<&'static str, usize>,
31}
32
33struct Pattern {
34 re: Regex,
35 label: &'static str,
36 replacement: &'static str,
37}
38
39lazy_static! {
40 static ref PATTERNS: Vec<Pattern> = vec![
41 Pattern {
43 re: Regex::new(r"(?i)(C:[/\\]Users[/\\])([^/\\\r\n\t ]+)([/\\])").unwrap(),
44 label: "username-path",
45 replacement: "${1}[USER]${3}",
46 },
47 Pattern {
49 re: Regex::new(r"(/(?:home|Users)/)([^/\r\n\t ]+)(/)").unwrap(),
50 label: "username-path",
51 replacement: "${1}[USER]${3}",
52 },
53 Pattern {
55 re: Regex::new(r"\b([0-9A-Fa-f]{2}[:\-]){5}[0-9A-Fa-f]{2}\b").unwrap(),
56 label: "mac-address",
57 replacement: "[MAC]",
58 },
59 Pattern {
61 re: Regex::new(r"(?i)(serial\s*(?:number)?[:=]\s*)([^\s\r\n]{4,})").unwrap(),
62 label: "serial-number",
63 replacement: "${1}[SERIAL]",
64 },
65 Pattern {
67 re: Regex::new(
68 r"(?i)((?:hostname|computer\s*name|machine\s*name|device\s*name|netbios\s*name)\s*[:=]\s*)([^\s\r\n]+)"
69 ).unwrap(),
70 label: "hostname",
71 replacement: "${1}[HOSTNAME]",
72 },
73 Pattern {
75 re: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").unwrap(),
76 label: "aws-key",
77 replacement: "[AWS-KEY]",
78 },
79 Pattern {
82 re: Regex::new(
83 r"(?i)((?:api[_\-]?key|secret[_\-]?key|access[_\-]?token|auth[_\-]?token|password|passwd|pwd|private[_\-]?key|client[_\-]?secret)[^\s=:]*\s*[:=]\s*)(\S{8,})"
84 ).unwrap(),
85 label: "credential",
86 replacement: "${1}[REDACTED]",
87 },
88 ];
89}
90
91pub fn redact(input: &str) -> RedactResult {
94 let mut text = input.to_string();
95 let mut counts: BTreeMap<&'static str, usize> = BTreeMap::new();
96
97 for pattern in PATTERNS.iter() {
98 let hits = pattern.re.find_iter(&text).count();
99 if hits > 0 {
100 *counts.entry(pattern.label).or_insert(0) += hits;
101 text = pattern
102 .re
103 .replace_all(&text, pattern.replacement)
104 .into_owned();
105 }
106 }
107
108 let total: usize = counts.values().sum();
109
110 let summary_header = if total == 0 {
111 String::from("[edge-redact: no sensitive patterns detected]")
112 } else {
113 let detail: Vec<String> = counts
114 .iter()
115 .map(|(label, n)| format!("{label} \u{00d7}{n}"))
116 .collect();
117 format!(
118 "[edge-redact: {total} substitution(s) — {} — values replaced before leaving this machine]",
119 detail.join(", ")
120 )
121 };
122
123 RedactResult {
124 text,
125 redaction_count: total,
126 summary_header,
127 tier1_hits: counts,
128 }
129}
130
131pub fn apply(raw: &str) -> String {
134 let result = redact(raw);
135 format!("{}\n\n{}", result.summary_header, result.text)
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141
142 #[test]
143 fn redacts_windows_username_path() {
144 let input = "path: C:\\Users\\johndoe\\Documents\\project";
145 let r = redact(input);
146 assert!(r.text.contains("[USER]"), "should redact username");
147 assert!(
148 !r.text.contains("johndoe"),
149 "should not contain raw username"
150 );
151 assert!(r.redaction_count > 0);
152 }
153
154 #[test]
155 fn redacts_mac_address() {
156 let input = "MAC: 00:1A:2B:3C:4D:5E adapter connected";
157 let r = redact(input);
158 assert!(r.text.contains("[MAC]"), "should redact MAC");
159 assert!(
160 !r.text.contains("00:1A:2B:3C:4D:5E"),
161 "raw MAC must not appear"
162 );
163 }
164
165 #[test]
166 fn redacts_serial_number() {
167 let input = "SerialNumber: WD-WX12345678";
168 let r = redact(input);
169 assert!(r.text.contains("[SERIAL]"), "should redact serial");
170 assert!(
171 !r.text.contains("WD-WX12345678"),
172 "raw serial must not appear"
173 );
174 }
175
176 #[test]
177 fn redacts_hostname_label() {
178 let input = "ComputerName: CORP-LAPTOP-007";
179 let r = redact(input);
180 assert!(r.text.contains("[HOSTNAME]"), "should redact hostname");
181 assert!(
182 !r.text.contains("CORP-LAPTOP-007"),
183 "raw hostname must not appear"
184 );
185 }
186
187 #[test]
188 fn redacts_aws_key() {
189 let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE found in env";
190 let r = redact(input);
191 assert!(r.text.contains("[AWS-KEY]"), "should redact AWS key");
192 assert!(
193 !r.text.contains("AKIAIOSFODNN7EXAMPLE"),
194 "raw key must not appear"
195 );
196 }
197
198 #[test]
199 fn redacts_credential_value() {
200 let input = "API_KEY=sk-supersecretvalue123 exported";
201 let r = redact(input);
202 assert!(
203 r.text.contains("[REDACTED]"),
204 "should redact credential value"
205 );
206 assert!(
207 !r.text.contains("sk-supersecretvalue123"),
208 "raw secret must not appear"
209 );
210 }
211
212 #[test]
213 fn clean_input_passes_through_unchanged() {
214 let input = "Processes: 42 running\nCPU: 12%\nRAM: 8.1 GB / 32 GB";
215 let r = redact(input);
216 assert_eq!(r.redaction_count, 0);
217 assert_eq!(r.text, input);
218 assert!(r.summary_header.contains("no sensitive patterns"));
219 }
220
221 #[test]
222 fn apply_always_prepends_header() {
223 let out = apply("CPU: 15%");
224 assert!(out.starts_with("[edge-redact:"), "header must be first");
225 }
226}