use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{OnceLock, RwLock};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
#[derive(Default)]
pub enum MatchType {
#[default]
Exact,
Regex,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KeywordEntry {
pub pattern: String,
#[serde(default)]
pub match_type: MatchType,
#[serde(default = "default_true")]
pub enabled: bool,
}
fn default_true() -> bool {
true
}
impl KeywordEntry {
pub fn exact(pattern: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
match_type: MatchType::Exact,
enabled: true,
}
}
pub fn regex(pattern: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
match_type: MatchType::Regex,
enabled: true,
}
}
pub fn validate(&self) -> Result<(), String> {
if self.match_type == MatchType::Regex {
regex::Regex::new(&self.pattern)
.map_err(|e| format!("Invalid regex pattern '{}': {}", self.pattern, e))?;
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct KeywordMaskingConfig {
#[serde(default)]
pub entries: Vec<KeywordEntry>,
}
static REGEX_CACHE: OnceLock<RwLock<HashMap<String, Option<Regex>>>> = OnceLock::new();
fn regex_cache() -> &'static RwLock<HashMap<String, Option<Regex>>> {
REGEX_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
}
impl KeywordMaskingConfig {
pub fn new() -> Self {
Self::default()
}
pub fn add_entry(&mut self, entry: KeywordEntry) {
self.entries.push(entry);
}
pub fn validate(&self) -> Result<(), Vec<(usize, String)>> {
let mut errors = Vec::new();
for (idx, entry) in self.entries.iter().enumerate() {
if let Err(e) = entry.validate() {
errors.push((idx, e));
}
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
pub fn apply_masking(&self, text: &str) -> String {
let mut result = text.to_string();
if self.entries.is_empty() {
return result;
}
let cache = regex_cache();
{
let missing: Vec<&str> = {
let read = cache.read().unwrap_or_else(|e| e.into_inner());
self.entries
.iter()
.filter(|entry| {
entry.enabled
&& entry.match_type == MatchType::Regex
&& !read.contains_key(&entry.pattern)
})
.map(|entry| entry.pattern.as_str())
.collect()
};
if !missing.is_empty() {
let mut write = cache.write().unwrap_or_else(|e| e.into_inner());
for pattern in missing {
write
.entry(pattern.to_string())
.or_insert_with(|| Regex::new(pattern).ok());
}
}
}
let read = cache.read().unwrap_or_else(|e| e.into_inner());
for entry in &self.entries {
if !entry.enabled {
continue;
}
match entry.match_type {
MatchType::Exact => {
result = result.replace(&entry.pattern, "[MASKED]");
}
MatchType::Regex => {
if let Some(regex) = read.get(&entry.pattern).and_then(|opt| opt.as_ref()) {
result = regex.replace_all(&result, "[MASKED]").to_string();
}
}
}
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exact_masking() {
let config = KeywordMaskingConfig {
entries: vec![KeywordEntry::exact("secret-token")],
};
let result = config.apply_masking("This has secret-token in it");
assert_eq!(result, "This has [MASKED] in it");
}
#[test]
fn test_regex_masking() {
let config = KeywordMaskingConfig {
entries: vec![KeywordEntry::regex(r"sk-[A-Za-z0-9]+")],
};
let result = config.apply_masking("API key: sk-abc123xyz");
assert_eq!(result, "API key: [MASKED]");
}
#[test]
fn test_disabled_entry_not_applied() {
let config = KeywordMaskingConfig {
entries: vec![KeywordEntry {
pattern: "secret".to_string(),
match_type: MatchType::Exact,
enabled: false,
}],
};
let result = config.apply_masking("This has secret in it");
assert_eq!(result, "This has secret in it");
}
#[test]
fn test_multiple_entries() {
let config = KeywordMaskingConfig {
entries: vec![KeywordEntry::exact("foo"), KeywordEntry::exact("bar")],
};
let result = config.apply_masking("foo and bar");
assert_eq!(result, "[MASKED] and [MASKED]");
}
#[test]
fn test_validate_regex() {
let entry = KeywordEntry::regex(r"[a-z+");
assert!(entry.validate().is_err());
let entry = KeywordEntry::regex(r"[a-z]+");
assert!(entry.validate().is_ok());
}
#[test]
fn test_validate_config() {
let config = KeywordMaskingConfig {
entries: vec![
KeywordEntry::regex(r"[a-z+"), KeywordEntry::regex(r"[a-z]+"), ],
};
let result = config.validate();
assert!(result.is_err());
let errors = result.unwrap_err();
assert_eq!(errors.len(), 1);
assert_eq!(errors[0].0, 0); }
#[test]
fn test_invalid_regex_pattern_skipped_but_valid_entries_still_mask() {
let config = KeywordMaskingConfig {
entries: vec![
KeywordEntry::exact("literal-secret"),
KeywordEntry::regex(r"[a-z+"), KeywordEntry::regex(r"sk-[A-Za-z0-9]+"),
],
};
let result =
config.apply_masking("literal-secret and sk-abc123 plus [a-z+ garbage and more text");
assert_eq!(
result,
"[MASKED] and [MASKED] plus [a-z+ garbage and more text"
);
}
#[test]
fn test_apply_masking_is_stable_across_repeated_calls() {
let config = KeywordMaskingConfig {
entries: vec![
KeywordEntry::regex(r"\d{3}-\d{4}"),
KeywordEntry::exact("secret"),
],
};
let input = "call secret at 555-1234 or 999-0000";
let first = config.apply_masking(input);
let second = config.apply_masking(input);
let third = config.apply_masking(&format!("again {input}"));
assert_eq!(
first, second,
"repeated calls must produce identical output"
);
assert_eq!(
first, "call [MASKED] at [MASKED] or [MASKED]",
"sanity-check expected masking"
);
assert_eq!(third, "again call [MASKED] at [MASKED] or [MASKED]");
}
}