harn_vm/redact/
patterns.rs1use std::borrow::Cow;
30use std::cell::RefCell;
31use std::collections::BTreeMap;
32use std::sync::LazyLock;
33
34use regex::Regex;
35
36use crate::secret_patterns::DEFAULT_SECRET_PATTERN_SPECS;
37
38pub const TOKEN_REDACTION_DIAGNOSTIC: &str = "HARN-OAU-001";
41
42pub const TOKEN_REDACTION_AUDIT_TOPIC: &str = "audit.token_redaction";
44
45const MAX_SCAN_INPUT_BYTES: usize = 256 * 1024;
52
53#[derive(Clone)]
55pub struct NamedPattern {
56 pub name: &'static str,
60 pub regex: Regex,
63}
64
65impl std::fmt::Debug for NamedPattern {
66 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67 f.debug_struct("NamedPattern")
68 .field("name", &self.name)
69 .field("regex", &self.regex.as_str())
70 .finish()
71 }
72}
73
74pub static DEFAULT_PATTERNS: LazyLock<Vec<NamedPattern>> = LazyLock::new(|| {
78 DEFAULT_SECRET_PATTERN_SPECS
79 .iter()
80 .map(|spec| NamedPattern {
81 name: spec.redaction_name,
82 regex: Regex::new(spec.regex).unwrap_or_else(|error| {
83 panic!("invalid {} secret regex: {error}", spec.redaction_name)
84 }),
85 })
86 .collect()
87});
88
89thread_local! {
90 static CUSTOM_PATTERNS: RefCell<Vec<NamedPattern>> = const { RefCell::new(Vec::new()) };
94
95 static AUDIT_SINK: RefCell<Option<AuditSink>> = const { RefCell::new(None) };
103
104 static AUDIT_RING: RefCell<Vec<RedactionEvent>> = const { RefCell::new(Vec::new()) };
110}
111
112#[derive(Clone, Debug, PartialEq, Eq)]
114pub struct RedactionEvent {
115 pub pattern_name: String,
116 pub match_count: usize,
117 pub bytes_redacted: usize,
119}
120
121pub type AuditSink = std::rc::Rc<dyn Fn(&RedactionEvent)>;
124
125pub fn register_custom_pattern(name: impl Into<String>, regex_source: &str) -> Result<(), String> {
130 let regex = Regex::new(regex_source).map_err(|error| format!("invalid regex: {error}"))?;
131 let name_static: &'static str = Box::leak(name.into().into_boxed_str());
137 CUSTOM_PATTERNS.with(|cell| {
138 cell.borrow_mut().push(NamedPattern {
139 name: name_static,
140 regex,
141 });
142 });
143 Ok(())
144}
145
146pub fn clear_custom_patterns() {
149 CUSTOM_PATTERNS.with(|cell| cell.borrow_mut().clear());
150}
151
152pub fn default_pattern_names() -> Vec<&'static str> {
154 DEFAULT_PATTERNS.iter().map(|p| p.name).collect()
155}
156
157pub fn custom_pattern_names() -> Vec<String> {
160 CUSTOM_PATTERNS.with(|cell| cell.borrow().iter().map(|p| p.name.to_string()).collect())
161}
162
163pub fn install_audit_sink(sink: Option<AuditSink>) -> Option<AuditSink> {
166 AUDIT_SINK.with(|cell| std::mem::replace(&mut *cell.borrow_mut(), sink))
167}
168
169fn emit_audit(events: &[RedactionEvent]) {
170 if events.is_empty() {
171 return;
172 }
173 AUDIT_RING.with(|ring| {
178 let mut ring = ring.borrow_mut();
179 for event in events {
180 if ring.len() >= 1024 {
185 ring.remove(0);
186 }
187 ring.push(event.clone());
188 }
189 });
190 let sink = AUDIT_SINK.with(|cell| cell.borrow().clone());
191 if let Some(sink) = sink {
192 for event in events {
193 sink(event);
194 }
195 }
196}
197
198pub fn drain_audit_ring() -> Vec<RedactionEvent> {
201 AUDIT_RING.with(|ring| std::mem::take(&mut *ring.borrow_mut()))
202}
203
204pub fn clear_audit_ring() {
208 AUDIT_RING.with(|ring| ring.borrow_mut().clear());
209}
210
211fn replacement_for(name: &str, matched: &str) -> String {
215 format!("<redacted:{name}:{}>", matched.len())
216}
217
218pub fn scan_secret_patterns<'a>(input: &'a str, placeholder: &str) -> Cow<'a, str> {
230 if input.is_empty() {
231 return Cow::Borrowed(input);
232 }
233 if input.len() > MAX_SCAN_INPUT_BYTES {
238 return Cow::Borrowed(input);
239 }
240 let use_named_placeholder = placeholder == crate::redact::REDACTED_PLACEHOLDER;
241
242 let mut owned: Option<String> = None;
243 let mut audit_events: BTreeMap<&'static str, RedactionEvent> = BTreeMap::new();
244
245 let custom: Vec<NamedPattern> = CUSTOM_PATTERNS.with(|cell| cell.borrow().clone());
249 let all_patterns = DEFAULT_PATTERNS.iter().chain(custom.iter());
250
251 for pattern in all_patterns {
252 let target: &str = owned.as_deref().unwrap_or(input);
253 let matches: Vec<(usize, usize)> = pattern
254 .regex
255 .find_iter(target)
256 .map(|m| (m.start(), m.end()))
257 .collect();
258 if matches.is_empty() {
259 continue;
260 }
261 let total_bytes: usize = matches.iter().map(|(s, e)| e - s).sum();
262 audit_events.insert(
263 pattern.name,
264 RedactionEvent {
265 pattern_name: pattern.name.to_string(),
266 match_count: matches.len(),
267 bytes_redacted: total_bytes,
268 },
269 );
270
271 let mut buffer = target.to_string();
274 for (start, end) in matches.into_iter().rev() {
275 let matched_slice = &buffer[start..end];
276 let replacement = if use_named_placeholder {
277 replacement_for(pattern.name, matched_slice)
278 } else {
279 placeholder.to_string()
280 };
281 buffer.replace_range(start..end, &replacement);
282 }
283 owned = Some(buffer);
284 }
285
286 let result = match owned {
287 Some(value) if value == input => Cow::Borrowed(input),
288 Some(value) => Cow::Owned(value),
289 None => Cow::Borrowed(input),
290 };
291
292 if matches!(result, Cow::Owned(_)) {
293 let events: Vec<RedactionEvent> = audit_events.into_values().collect();
294 emit_audit(&events);
295 }
296
297 result
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303
304 fn run_clean() {
305 clear_custom_patterns();
306 install_audit_sink(None);
307 clear_audit_ring();
308 }
309
310 #[test]
311 fn returns_borrowed_when_clean() {
312 run_clean();
313 let out = scan_secret_patterns("just plain text", crate::redact::REDACTED_PLACEHOLDER);
314 assert!(matches!(out, Cow::Borrowed(_)));
315 }
316
317 #[test]
318 fn replaces_aws_and_github_tokens_with_named_placeholder() {
319 run_clean();
320 let input = "AKIAABCDEFGHIJKLMNOP and ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
321 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
322 let rendered = out.into_owned();
323 assert!(rendered.contains("<redacted:aws_access_key:20>"));
324 assert!(rendered.contains("<redacted:github_token:40>"));
325 assert!(!rendered.contains("AKIAABCDEFGHIJKLMNOP"));
326 }
327
328 #[test]
329 fn legacy_placeholder_path_still_works_for_url_param_values() {
330 run_clean();
331 let input = "AKIAABCDEFGHIJKLMNOP";
332 let out = scan_secret_patterns(input, "%5Bredacted%5D");
335 assert!(out.contains("%5Bredacted%5D"));
336 assert!(!out.contains("AKIAABCDEFGHIJKLMNOP"));
337 }
338
339 #[test]
340 fn replaces_bearer_token_inside_text() {
341 run_clean();
342 let input = "header: Authorization: Bearer abcDEFghi123_-+/=xyz tail";
343 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
344 assert!(out.contains("<redacted:bearer_token:"));
345 assert!(!out.contains("abcDEFghi123_-+/=xyz"));
346 assert!(out.contains("tail"));
347 }
348
349 #[test]
350 fn replaces_sensitive_assignments_inside_text() {
351 run_clean();
352 let input = "retry with token=abc123 and max_tokens=200";
353 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
354 assert!(out.contains("<redacted:sensitive_assignment:"));
355 assert!(!out.contains("token=abc123"));
356 assert!(out.contains("max_tokens=200"));
357 }
358
359 #[test]
360 fn sensitive_assignment_preserves_source_declarations() {
361 run_clean();
362 let input = "pub const Token = struct { kind: u8 };\nconst Secret = enum { a, b };";
363 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
364 assert!(matches!(out, Cow::Borrowed(_)));
365 }
366
367 #[test]
368 fn sensitive_assignment_redacts_placeholder_secret_words() {
369 run_clean();
370 let input = "Checkout incident needed the same query token=secret";
371 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
372 assert!(out.contains("<redacted:sensitive_assignment:"));
373 assert!(!out.contains("token=secret"));
374 }
375
376 #[test]
377 fn replaces_jwt_tokens() {
378 run_clean();
379 let input = "token=eyJabcd.eyJefgh.signature_pad here";
380 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
381 assert!(out.contains("<redacted:jwt:"));
382 assert!(!out.contains("eyJabcd.eyJefgh.signature_pad"));
383 }
384
385 #[test]
386 fn replaces_private_key_blocks() {
387 run_clean();
388 let input =
389 "-----BEGIN OPENSSH PRIVATE KEY-----\nsecret-material\n-----END OPENSSH PRIVATE KEY-----";
390 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
391 assert!(out.contains("<redacted:private_key_block:"));
392 assert!(!out.contains("secret-material"));
393 }
394
395 #[test]
396 fn replaces_ai_provider_tokens() {
397 run_clean();
398 let huggingface = format!("hf_{}", "a".repeat(24));
399 let cerebras = format!("csk-{}", "b".repeat(48));
400 let together = format!("tgp_v1_{}", "c".repeat(32));
401 let google = format!("AIza{}", "D".repeat(35));
402 let input = format!("{huggingface} {cerebras} {together} {google}");
403
404 let out = scan_secret_patterns(&input, crate::redact::REDACTED_PLACEHOLDER);
405 let rendered = out.into_owned();
406
407 assert!(rendered.contains("<redacted:huggingface_token:"));
408 assert!(rendered.contains("<redacted:cerebras_key:"));
409 assert!(rendered.contains("<redacted:together_key:"));
410 assert!(rendered.contains("<redacted:google_api_key:"));
411 assert!(!rendered.contains(&huggingface));
412 assert!(!rendered.contains(&cerebras));
413 assert!(!rendered.contains(&together));
414 assert!(!rendered.contains(&google));
415 }
416
417 #[test]
418 fn custom_pattern_redacts_and_is_introspectable() {
419 run_clean();
420 register_custom_pattern("acme_token", r"\bACME-[A-Z0-9]{8}\b").unwrap();
421 assert_eq!(custom_pattern_names(), vec!["acme_token".to_string()]);
422 let out = scan_secret_patterns(
423 "header ACME-12345678 trailer",
424 crate::redact::REDACTED_PLACEHOLDER,
425 );
426 assert!(
427 out.contains("<redacted:acme_token:13>"),
428 "expected acme_token redaction, got: {out}"
429 );
430 clear_custom_patterns();
431 assert!(custom_pattern_names().is_empty());
432 }
433
434 #[test]
435 fn audit_sink_receives_one_event_per_matching_pattern() {
436 use std::cell::RefCell;
437 use std::rc::Rc;
438 run_clean();
439 let captured: Rc<RefCell<Vec<RedactionEvent>>> = Rc::new(RefCell::new(Vec::new()));
440 let sink_captured = captured.clone();
441 install_audit_sink(Some(Rc::new(move |event| {
442 sink_captured.borrow_mut().push(event.clone());
443 })));
444 let input =
445 "AKIAABCDEFGHIJKLMNOP AKIA0000000000000000 ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
446 let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
447 assert!(matches!(out, Cow::Owned(_)));
448 let events = captured.borrow();
449 assert_eq!(events.len(), 2);
450 let by_name: BTreeMap<&str, &RedactionEvent> = events
451 .iter()
452 .map(|event| (event.pattern_name.as_str(), event))
453 .collect();
454 assert_eq!(by_name.get("aws_access_key").unwrap().match_count, 2);
455 assert_eq!(by_name.get("github_token").unwrap().match_count, 1);
456 drop(events);
460 install_audit_sink(None);
461 let ring = drain_audit_ring();
462 assert_eq!(ring.len(), 2);
463 }
464
465 #[test]
466 fn audit_ring_records_events_even_without_a_sink() {
467 run_clean();
468 let _ = scan_secret_patterns("AKIAABCDEFGHIJKLMNOP", crate::redact::REDACTED_PLACEHOLDER);
469 let ring = drain_audit_ring();
470 assert_eq!(ring.len(), 1);
471 assert_eq!(ring[0].pattern_name, "aws_access_key");
472 assert!(drain_audit_ring().is_empty());
474 }
475
476 #[test]
477 fn input_above_cap_is_passthrough() {
478 run_clean();
479 let huge = "AKIAABCDEFGHIJKLMNOP".repeat(MAX_SCAN_INPUT_BYTES / 20 + 1);
480 let out = scan_secret_patterns(&huge, crate::redact::REDACTED_PLACEHOLDER);
481 assert!(matches!(out, Cow::Borrowed(_)));
482 }
483
484 #[test]
485 fn default_pattern_names_are_stable() {
486 let names = default_pattern_names();
487 assert!(names.contains(&"jwt"));
488 assert!(names.contains(&"github_token"));
489 assert!(names.contains(&"github_pat_fine"));
490 assert!(names.contains(&"slack_token"));
491 assert!(names.contains(&"aws_access_key"));
492 assert!(names.contains(&"huggingface_token"));
493 assert!(names.contains(&"cerebras_key"));
494 assert!(names.contains(&"together_key"));
495 assert!(names.contains(&"google_api_key"));
496 assert!(names.contains(&"private_key_block"));
497 assert!(names.contains(&"bearer_token"));
498 assert!(names.contains(&"sensitive_assignment"));
499 }
500}