use crate::error::{PiiError, PiiResult};
use crate::types::{AnonymizeResult, AnonymizedItem, Detection};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum Operator {
Redact,
Mask { ch: char, from_end: usize },
Replace { with: String },
HashSha256 { salt: String },
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AnonymizeConfig {
pub default: Operator,
pub per_entity: HashMap<String, Operator>,
}
impl Default for AnonymizeConfig {
fn default() -> Self {
Self {
default: Operator::Redact,
per_entity: HashMap::new(),
}
}
}
pub struct Anonymizer;
impl Anonymizer {
pub fn anonymize(
text: &str,
detections: &[Detection],
config: &AnonymizeConfig,
) -> PiiResult<AnonymizeResult> {
let mut ordered = detections.to_vec();
ordered.sort_by(|a, b| b.start.cmp(&a.start));
let mut output = text.to_string();
let mut items = Vec::new();
for detection in ordered {
let start = detection.start;
let end = detection.end;
if start > end || end > text.len() {
return Err(PiiError::Anonymizer("invalid offsets".to_string()));
}
let span = &text[start..end];
let op = config
.per_entity
.get(&detection.entity_type.as_str())
.unwrap_or(&config.default);
let replacement = apply_operator(op, span);
output.replace_range(start..end, &replacement);
items.push(AnonymizedItem {
entity: detection.clone(),
replacement,
});
}
items.sort_by(|a, b| a.entity.start.cmp(&b.entity.start));
Ok(AnonymizeResult { text: output, items })
}
}
fn apply_operator(op: &Operator, span: &str) -> String {
match op {
Operator::Redact => "<REDACTED>".to_string(),
Operator::Mask { ch, from_end } => mask(span, *ch, *from_end),
Operator::Replace { with } => with.clone(),
Operator::HashSha256 { salt } => hash(span, salt),
}
}
fn mask(value: &str, ch: char, from_end: usize) -> String {
let chars: Vec<char> = value.chars().collect();
if chars.is_empty() {
return String::new();
}
let keep = from_end.min(chars.len());
let mask_len = chars.len().saturating_sub(keep);
let mut result = String::new();
result.extend(std::iter::repeat(ch).take(mask_len));
if keep > 0 {
result.extend(chars[chars.len() - keep..].iter());
}
result
}
fn hash(value: &str, salt: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(salt.as_bytes());
hasher.update(value.as_bytes());
let digest = hasher.finalize();
hex::encode(digest)
}