hematite/agent/
edge_redact.rs1use lazy_static::lazy_static;
19use regex::Regex;
20use std::collections::BTreeMap;
21use std::fmt::Write as _;
22
23pub struct RedactResult {
24 pub text: String,
26 pub redaction_count: usize,
28 pub summary_header: String,
30 pub tier1_hits: BTreeMap<&'static str, usize>,
32}
33
34struct Pattern {
35 re: Regex,
36 label: &'static str,
37 replacement: &'static str,
38}
39
40lazy_static! {
41 static ref PATTERNS: Vec<Pattern> = vec![
42 Pattern {
45 re: Regex::new(r"(?i)(C:[/\\]Users[/\\])([^/\\\r\n\t ]+)([/\\]?)").unwrap(),
46 label: "username-path",
47 replacement: "${1}[USER]${3}",
48 },
49 Pattern {
52 re: Regex::new(r"(/(?:home|Users)/)([^/\r\n\t ]+)(/?)").unwrap(),
53 label: "username-path",
54 replacement: "${1}[USER]${3}",
55 },
56 Pattern {
58 re: Regex::new(r"\b([0-9A-Fa-f]{2}[:\-]){5}[0-9A-Fa-f]{2}\b").unwrap(),
59 label: "mac-address",
60 replacement: "[MAC]",
61 },
62 Pattern {
64 re: Regex::new(r"(?i)(serial\s*(?:number)?[:=]\s*)([^\s\r\n]{4,})").unwrap(),
65 label: "serial-number",
66 replacement: "${1}[SERIAL]",
67 },
68 Pattern {
70 re: Regex::new(
71 r"(?i)((?:hostname|computer\s*name|machine\s*name|device\s*name|netbios\s*name)\s*[:=]\s*)([^\s\r\n]+)"
72 ).unwrap(),
73 label: "hostname",
74 replacement: "${1}[HOSTNAME]",
75 },
76 Pattern {
78 re: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").unwrap(),
79 label: "aws-key",
80 replacement: "[AWS-KEY]",
81 },
82 Pattern {
85 re: Regex::new(
86 r"(?i)((?:api[_\-]?key|secret[_\-]?key|access[_\-]?token|auth[_\-]?token|password|passwd|pwd|private[_\-]?key|client[_\-]?secret)[^\s=:]*\s*[:=]\s*)(\S{8,})"
87 ).unwrap(),
88 label: "credential",
89 replacement: "${1}[REDACTED]",
90 },
91 ];
92}
93
94pub fn redact(input: &str) -> RedactResult {
97 let mut text = input.to_string();
98 let mut counts: BTreeMap<&'static str, usize> = BTreeMap::new();
99
100 for pattern in PATTERNS.iter() {
101 let hits = pattern.re.find_iter(&text).count();
102 if hits > 0 {
103 *counts.entry(pattern.label).or_insert(0) += hits;
104 text = pattern
105 .re
106 .replace_all(&text, pattern.replacement)
107 .into_owned();
108 }
109 }
110
111 let total: usize = counts.values().sum();
112
113 let summary_header = if total == 0 {
114 String::from("[edge-redact: no sensitive patterns detected]")
115 } else {
116 let mut detail = String::with_capacity(counts.len() * 20);
117 for (i, (label, n)) in counts.iter().enumerate() {
118 if i > 0 {
119 detail.push_str(", ");
120 }
121 detail.push_str(label);
122 let _ = write!(detail, " \u{00d7}{n}");
123 }
124 format!(
125 "[edge-redact: {total} substitution(s) — {detail} — values replaced before leaving this machine]"
126 )
127 };
128
129 RedactResult {
130 text,
131 redaction_count: total,
132 summary_header,
133 tier1_hits: counts,
134 }
135}
136
137pub fn apply(raw: &str) -> String {
140 let result = redact(raw);
141 format!("{}\n\n{}", result.summary_header, result.text)
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147
148 #[test]
149 fn redacts_windows_username_path() {
150 let input = "path: C:\\Users\\johndoe\\Documents\\project";
151 let r = redact(input);
152 assert!(r.text.contains("[USER]"), "should redact username");
153 assert!(
154 !r.text.contains("johndoe"),
155 "should not contain raw username"
156 );
157 assert!(r.redaction_count > 0);
158 }
159
160 #[test]
161 fn redacts_mac_address() {
162 let input = "MAC: 00:1A:2B:3C:4D:5E adapter connected";
163 let r = redact(input);
164 assert!(r.text.contains("[MAC]"), "should redact MAC");
165 assert!(
166 !r.text.contains("00:1A:2B:3C:4D:5E"),
167 "raw MAC must not appear"
168 );
169 }
170
171 #[test]
172 fn redacts_serial_number() {
173 let input = "SerialNumber: WD-WX12345678";
174 let r = redact(input);
175 assert!(r.text.contains("[SERIAL]"), "should redact serial");
176 assert!(
177 !r.text.contains("WD-WX12345678"),
178 "raw serial must not appear"
179 );
180 }
181
182 #[test]
183 fn redacts_hostname_label() {
184 let input = "ComputerName: CORP-LAPTOP-007";
185 let r = redact(input);
186 assert!(r.text.contains("[HOSTNAME]"), "should redact hostname");
187 assert!(
188 !r.text.contains("CORP-LAPTOP-007"),
189 "raw hostname must not appear"
190 );
191 }
192
193 #[test]
194 fn redacts_aws_key() {
195 let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE found in env";
196 let r = redact(input);
197 assert!(r.text.contains("[AWS-KEY]"), "should redact AWS key");
198 assert!(
199 !r.text.contains("AKIAIOSFODNN7EXAMPLE"),
200 "raw key must not appear"
201 );
202 }
203
204 #[test]
205 fn redacts_credential_value() {
206 let input = "API_KEY=sk-supersecretvalue123 exported";
207 let r = redact(input);
208 assert!(
209 r.text.contains("[REDACTED]"),
210 "should redact credential value"
211 );
212 assert!(
213 !r.text.contains("sk-supersecretvalue123"),
214 "raw secret must not appear"
215 );
216 }
217
218 #[test]
219 fn clean_input_passes_through_unchanged() {
220 let input = "Processes: 42 running\nCPU: 12%\nRAM: 8.1 GB / 32 GB";
221 let r = redact(input);
222 assert_eq!(r.redaction_count, 0);
223 assert_eq!(r.text, input);
224 assert!(r.summary_header.contains("no sensitive patterns"));
225 }
226
227 #[test]
228 fn apply_always_prepends_header() {
229 let out = apply("CPU: 15%");
230 assert!(out.starts_with("[edge-redact:"), "header must be first");
231 }
232}