harn_vm/redact/
patterns.rs1use std::borrow::Cow;
30use std::cell::RefCell;
31use std::collections::BTreeMap;
32use std::sync::LazyLock;
33
34use regex::Regex;
35
36use crate::secret_patterns::DEFAULT_SECRET_PATTERN_SPECS;
37
38pub const TOKEN_REDACTION_DIAGNOSTIC: &str = "HARN-OAU-001";
41
42pub const TOKEN_REDACTION_AUDIT_TOPIC: &str = "audit.token_redaction";
44
45const MAX_SCAN_INPUT_BYTES: usize = 256 * 1024;
52
53#[derive(Clone)]
55pub struct NamedPattern {
56 pub name: &'static str,
60 pub regex: Regex,
63}
64
65impl std::fmt::Debug for NamedPattern {
66 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67 f.debug_struct("NamedPattern")
68 .field("name", &self.name)
69 .field("regex", &self.regex.as_str())
70 .finish()
71 }
72}
73
74pub static DEFAULT_PATTERNS: LazyLock<Vec<NamedPattern>> = LazyLock::new(|| {
78 DEFAULT_SECRET_PATTERN_SPECS
79 .iter()
80 .map(|spec| NamedPattern {
81 name: spec.redaction_name,
82 regex: Regex::new(spec.regex).unwrap_or_else(|error| {
83 panic!("invalid {} secret regex: {error}", spec.redaction_name)
84 }),
85 })
86 .collect()
87});
88
89thread_local! {
90 static CUSTOM_PATTERNS: RefCell<Vec<NamedPattern>> = const { RefCell::new(Vec::new()) };
94
95 static AUDIT_SINK: RefCell<Option<AuditSink>> = const { RefCell::new(None) };
103
104 static AUDIT_RING: RefCell<Vec<RedactionEvent>> = const { RefCell::new(Vec::new()) };
110}
111
112#[derive(Clone, Debug, PartialEq, Eq)]
114pub struct RedactionEvent {
115 pub pattern_name: String,
116 pub match_count: usize,
117 pub bytes_redacted: usize,
119}
120
121pub type AuditSink = std::rc::Rc<dyn Fn(&RedactionEvent)>;
124
125pub fn register_custom_pattern(name: impl Into<String>, regex_source: &str) -> Result<(), String> {
130 let regex = Regex::new(regex_source).map_err(|error| format!("invalid regex: {error}"))?;
131 let name_static: &'static str = Box::leak(name.into().into_boxed_str());
137 CUSTOM_PATTERNS.with(|cell| {
138 cell.borrow_mut().push(NamedPattern {
139 name: name_static,
140 regex,
141 });
142 });
143 Ok(())
144}
145
146pub fn clear_custom_patterns() {
149 CUSTOM_PATTERNS.with(|cell| cell.borrow_mut().clear());
150}
151
152pub fn default_pattern_names() -> Vec<&'static str> {
154 DEFAULT_PATTERNS.iter().map(|p| p.name).collect()
155}
156
157pub fn custom_pattern_names() -> Vec<String> {
160 CUSTOM_PATTERNS.with(|cell| cell.borrow().iter().map(|p| p.name.to_string()).collect())
161}
162
163pub fn install_audit_sink(sink: Option<AuditSink>) -> Option<AuditSink> {
166 AUDIT_SINK.with(|cell| std::mem::replace(&mut *cell.borrow_mut(), sink))
167}
168
169fn emit_audit(events: &[RedactionEvent]) {
170 if events.is_empty() {
171 return;
172 }
173 AUDIT_RING.with(|ring| {
178 let mut ring = ring.borrow_mut();
179 for event in events {
180 if ring.len() >= 1024 {
185 ring.remove(0);
186 }
187 ring.push(event.clone());
188 }
189 });
190 let sink = AUDIT_SINK.with(|cell| cell.borrow().clone());
191 if let Some(sink) = sink {
192 for event in events {
193 sink(event);
194 }
195 }
196}
197
198pub fn drain_audit_ring() -> Vec<RedactionEvent> {
201 AUDIT_RING.with(|ring| std::mem::take(&mut *ring.borrow_mut()))
202}
203
204pub fn clear_audit_ring() {
208 AUDIT_RING.with(|ring| ring.borrow_mut().clear());
209}
210
211fn replacement_for(name: &str, matched: &str) -> String {
215 format!("<redacted:{name}:{}>", matched.len())
216}
217
218pub fn scan_secret_patterns<'a>(input: &'a str, placeholder: &str) -> Cow<'a, str> {
230 if input.is_empty() {
231 return Cow::Borrowed(input);
232 }
233 if input.len() > MAX_SCAN_INPUT_BYTES {
238 return Cow::Borrowed(input);
239 }
240 let use_named_placeholder = placeholder == crate::redact::REDACTED_PLACEHOLDER;
241
242 let mut owned: Option<String> = None;
243 let mut audit_events: BTreeMap<&'static str, RedactionEvent> = BTreeMap::new();
244
245 let custom: Vec<NamedPattern> = CUSTOM_PATTERNS.with(|cell| cell.borrow().clone());
249 let all_patterns = DEFAULT_PATTERNS.iter().chain(custom.iter());
250
251 for pattern in all_patterns {
252 let target: &str = owned.as_deref().unwrap_or(input);
253 let matches: Vec<(usize, usize)> = pattern
254 .regex
255 .find_iter(target)
256 .map(|m| (m.start(), m.end()))
257 .collect();
258 if matches.is_empty() {
259 continue;
260 }
261 let total_bytes: usize = matches.iter().map(|(s, e)| e - s).sum();
262 audit_events.insert(
263 pattern.name,
264 RedactionEvent {
265 pattern_name: pattern.name.to_string(),
266 match_count: matches.len(),
267 bytes_redacted: total_bytes,
268 },
269 );
270
271 let mut buffer = target.to_string();
274 for (start, end) in matches.into_iter().rev() {
275 let matched_slice = &buffer[start..end];
276 let replacement = if use_named_placeholder {
277 replacement_for(pattern.name, matched_slice)
278 } else {
279 placeholder.to_string()
280 };
281 buffer.replace_range(start..end, &replacement);
282 }
283 owned = Some(buffer);
284 }
285
286 let result = match owned {
287 Some(value) if value == input => Cow::Borrowed(input),
288 Some(value) => Cow::Owned(value),
289 None => Cow::Borrowed(input),
290 };
291
292 if matches!(result, Cow::Owned(_)) {
293 let events: Vec<RedactionEvent> = audit_events.into_values().collect();
294 emit_audit(&events);
295 }
296
297 result
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303
304 fn run_clean() {
305 clear_custom_patterns();
306 install_audit_sink(None);
307 clear_audit_ring();
308 }
309
310 #[test]
311 fn returns_borrowed_when_clean() {
312 run_clean();
313 let out = scan_secret_patterns("just plain text", crate::redact::REDACTED_PLACEHOLDER);
314 assert!(matches!(out, Cow::Borrowed(_)));
315 }
316
317 #[test]
318 fn replaces_aws_and_github_tokens_with_named_placeholder() {
319 run_clean();
320 let input = "AKIAABCDEFGHIJKLMNOP and ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
321 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
322 let rendered = out.into_owned();
323 assert!(rendered.contains("<redacted:aws_access_key:20>"));
324 assert!(rendered.contains("<redacted:github_token:40>"));
325 assert!(!rendered.contains("AKIAABCDEFGHIJKLMNOP"));
326 }
327
328 #[test]
329 fn legacy_placeholder_path_still_works_for_url_param_values() {
330 run_clean();
331 let input = "AKIAABCDEFGHIJKLMNOP";
332 let out = scan_secret_patterns(input, "%5Bredacted%5D");
335 assert!(out.contains("%5Bredacted%5D"));
336 assert!(!out.contains("AKIAABCDEFGHIJKLMNOP"));
337 }
338
339 #[test]
340 fn replaces_bearer_token_inside_text() {
341 run_clean();
342 let input = "header: Authorization: Bearer abcDEFghi123_-+/=xyz tail";
343 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
344 assert!(out.contains("<redacted:bearer_token:"));
345 assert!(!out.contains("abcDEFghi123_-+/=xyz"));
346 assert!(out.contains("tail"));
347 }
348
349 #[test]
350 fn replaces_jwt_tokens() {
351 run_clean();
352 let input = "token=eyJabcd.eyJefgh.signature_pad here";
353 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
354 assert!(out.contains("<redacted:jwt:"));
355 assert!(!out.contains("eyJabcd.eyJefgh.signature_pad"));
356 }
357
358 #[test]
359 fn replaces_private_key_blocks() {
360 run_clean();
361 let input =
362 "-----BEGIN OPENSSH PRIVATE KEY-----\nsecret-material\n-----END OPENSSH PRIVATE KEY-----";
363 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
364 assert!(out.contains("<redacted:private_key_block:"));
365 assert!(!out.contains("secret-material"));
366 }
367
368 #[test]
369 fn custom_pattern_redacts_and_is_introspectable() {
370 run_clean();
371 register_custom_pattern("acme_token", r"\bACME-[A-Z0-9]{8}\b").unwrap();
372 assert_eq!(custom_pattern_names(), vec!["acme_token".to_string()]);
373 let out = scan_secret_patterns(
374 "header ACME-12345678 trailer",
375 crate::redact::REDACTED_PLACEHOLDER,
376 );
377 assert!(
378 out.contains("<redacted:acme_token:13>"),
379 "expected acme_token redaction, got: {out}"
380 );
381 clear_custom_patterns();
382 assert!(custom_pattern_names().is_empty());
383 }
384
385 #[test]
386 fn audit_sink_receives_one_event_per_matching_pattern() {
387 use std::cell::RefCell;
388 use std::rc::Rc;
389 run_clean();
390 let captured: Rc<RefCell<Vec<RedactionEvent>>> = Rc::new(RefCell::new(Vec::new()));
391 let sink_captured = captured.clone();
392 install_audit_sink(Some(Rc::new(move |event| {
393 sink_captured.borrow_mut().push(event.clone());
394 })));
395 let input =
396 "AKIAABCDEFGHIJKLMNOP AKIA0000000000000000 ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
397 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
398 assert!(matches!(out, Cow::Owned(_)));
399 let events = captured.borrow();
400 assert_eq!(events.len(), 2);
401 let by_name: BTreeMap<&str, &RedactionEvent> = events
402 .iter()
403 .map(|event| (event.pattern_name.as_str(), event))
404 .collect();
405 assert_eq!(by_name.get("aws_access_key").unwrap().match_count, 2);
406 assert_eq!(by_name.get("github_token").unwrap().match_count, 1);
407 drop(events);
411 install_audit_sink(None);
412 let ring = drain_audit_ring();
413 assert_eq!(ring.len(), 2);
414 }
415
416 #[test]
417 fn audit_ring_records_events_even_without_a_sink() {
418 run_clean();
419 let _ = scan_secret_patterns("AKIAABCDEFGHIJKLMNOP", crate::redact::REDACTED_PLACEHOLDER);
420 let ring = drain_audit_ring();
421 assert_eq!(ring.len(), 1);
422 assert_eq!(ring[0].pattern_name, "aws_access_key");
423 assert!(drain_audit_ring().is_empty());
425 }
426
427 #[test]
428 fn input_above_cap_is_passthrough() {
429 run_clean();
430 let huge = "AKIAABCDEFGHIJKLMNOP".repeat(MAX_SCAN_INPUT_BYTES / 20 + 1);
431 let out = scan_secret_patterns(&huge, crate::redact::REDACTED_PLACEHOLDER);
432 assert!(matches!(out, Cow::Borrowed(_)));
433 }
434
435 #[test]
436 fn default_pattern_names_are_stable() {
437 let names = default_pattern_names();
438 assert!(names.contains(&"jwt"));
439 assert!(names.contains(&"github_token"));
440 assert!(names.contains(&"github_pat_fine"));
441 assert!(names.contains(&"slack_token"));
442 assert!(names.contains(&"aws_access_key"));
443 assert!(names.contains(&"private_key_block"));
444 assert!(names.contains(&"bearer_token"));
445 }
446}