Skip to main content

cloakrs_core/
masker.rs

1//! Masking strategies for detected PII.
2
3use crate::{CloakError, EntityType, PiiEntity, Result};
4use aes_gcm::aead::Aead;
5use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
6use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
7use base64::Engine;
8use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10
11/// How detected PII should be masked.
12///
13/// # Examples
14///
15/// ```
16/// use cloakrs_core::MaskStrategy;
17///
18/// let strategy = MaskStrategy::PartialMask {
19///     reveal_prefix: 1,
20///     reveal_suffix: 4,
21///     mask_char: '*',
22/// };
23/// assert!(matches!(strategy, MaskStrategy::PartialMask { .. }));
24/// ```
25#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
26pub enum MaskStrategy {
27    /// Replace with a typed placeholder such as `[EMAIL]` or `[SSN]`.
28    #[default]
29    Redact,
30    /// Preserve selected prefix and suffix characters while masking the middle.
31    PartialMask {
32        /// Number of characters to reveal at the start.
33        reveal_prefix: usize,
34        /// Number of characters to reveal at the end.
35        reveal_suffix: usize,
36        /// Character used for masked positions.
37        mask_char: char,
38    },
39    /// Deterministic SHA-256 hash.
40    Hash {
41        /// Optional salt.
42        salt: Option<String>,
43    },
44    /// Replace with deterministic fake-but-safe data.
45    Replace,
46    /// AES-256-GCM encryption.
47    Encrypt {
48        /// Encryption key.
49        key: String,
50    },
51    /// Replace every finding with this exact string.
52    Custom(String),
53}
54
55impl MaskStrategy {
56    /// Returns the replacement text for a finding.
57    #[must_use]
58    pub fn replacement(&self, finding: &PiiEntity) -> String {
59        match self.try_replacement(finding) {
60            Ok(replacement) => replacement,
61            Err(_) => finding.entity_type.redaction_tag(),
62        }
63    }
64
65    /// Returns the replacement text for a finding, propagating fallible strategies.
66    pub fn try_replacement(&self, finding: &PiiEntity) -> Result<String> {
67        match self {
68            Self::Redact => Ok(finding.entity_type.redaction_tag()),
69            Self::PartialMask {
70                reveal_prefix,
71                reveal_suffix,
72                mask_char,
73            } => Ok(partial_mask(
74                finding,
75                *reveal_prefix,
76                *reveal_suffix,
77                *mask_char,
78            )),
79            Self::Hash { salt } => {
80                if finding.entity_type == EntityType::UserPath {
81                    Ok(hash_user_path(&finding.text, salt.as_deref()))
82                } else {
83                    Ok(hash_mask(finding, salt.as_deref(), DEFAULT_HASH_LENGTH))
84                }
85            }
86            Self::Replace => Ok(replace_mask(finding)),
87            Self::Encrypt { key } => encrypt_mask(finding, key),
88            Self::Custom(replacement) => Ok(replacement.clone()),
89        }
90    }
91}
92
93const DEFAULT_HASH_LENGTH: usize = 16;
94const MIN_HASH_LENGTH: usize = 8;
95const MAX_HASH_LENGTH: usize = 64;
96const NONCE_LENGTH: usize = 12;
97
98/// Applies a masking strategy to text using the supplied findings.
99///
100/// Findings are deduplicated and then processed in descending span order so
101/// earlier byte offsets remain valid while the string is modified.
102///
103/// # Examples
104///
105/// ```
106/// use cloakrs_core::{apply_mask, Confidence, EntityType, MaskStrategy, PiiEntity, Span};
107///
108/// let finding = PiiEntity {
109///     entity_type: EntityType::Email,
110///     span: Span::new(8, 24),
111///     text: "user@example.com".to_string(),
112///     confidence: Confidence::new(0.95).unwrap(),
113///     recognizer_id: "email_regex_v1".to_string(),
114/// };
115///
116/// let masked = apply_mask("Contact user@example.com", &[finding], &MaskStrategy::Redact).unwrap();
117/// assert_eq!(masked, "Contact [EMAIL]");
118/// ```
119pub fn apply_mask(text: &str, findings: &[PiiEntity], strategy: &MaskStrategy) -> Result<String> {
120    let mut findings = deduplicate(findings);
121    findings.sort_by_key(|finding| std::cmp::Reverse(finding.span.start));
122
123    let mut result = text.to_string();
124    for finding in findings {
125        validate_span(text, &finding)?;
126        result.replace_range(
127            finding.span.start..finding.span.end,
128            &strategy.try_replacement(&finding)?,
129        );
130    }
131    Ok(result)
132}
133
134/// Decrypts a value produced by [`MaskStrategy::Encrypt`].
135///
136/// # Examples
137///
138/// ```
139/// use cloakrs_core::{apply_mask, decrypt_masked_value, Confidence, EntityType, MaskStrategy, PiiEntity, Span};
140///
141/// let key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
142/// let finding = PiiEntity {
143///     entity_type: EntityType::Email,
144///     span: Span::new(0, 16),
145///     text: "user@example.com".to_string(),
146///     confidence: Confidence::new(0.95).unwrap(),
147///     recognizer_id: "email_regex_v1".to_string(),
148/// };
149///
150/// let encrypted = apply_mask("user@example.com", &[finding], &MaskStrategy::Encrypt { key: key.to_string() }).unwrap();
151/// assert_eq!(decrypt_masked_value(&encrypted, key).unwrap(), "user@example.com");
152/// ```
153pub fn decrypt_masked_value(value: &str, key: &str) -> Result<String> {
154    let Some(encoded) = value
155        .strip_prefix("ENC[")
156        .and_then(|value| value.strip_suffix(']'))
157    else {
158        return Err(CloakError::EncryptionError(
159            "encrypted value must use ENC[...] format".to_string(),
160        ));
161    };
162
163    let bytes = BASE64_STANDARD
164        .decode(encoded)
165        .map_err(|error| CloakError::EncryptionError(error.to_string()))?;
166    if bytes.len() <= NONCE_LENGTH {
167        return Err(CloakError::EncryptionError(
168            "encrypted payload is too short".to_string(),
169        ));
170    }
171
172    let key = parse_hex_key(key)?;
173    let cipher = Aes256Gcm::new_from_slice(&key)
174        .map_err(|error| CloakError::EncryptionError(error.to_string()))?;
175    let nonce = Nonce::from_slice(&bytes[..NONCE_LENGTH]);
176    let plaintext = cipher
177        .decrypt(nonce, &bytes[NONCE_LENGTH..])
178        .map_err(|error| CloakError::EncryptionError(error.to_string()))?;
179
180    String::from_utf8(plaintext).map_err(|error| CloakError::EncryptionError(error.to_string()))
181}
182
183/// Deduplicates overlapping findings.
184#[must_use]
185pub fn deduplicate(findings: &[PiiEntity]) -> Vec<PiiEntity> {
186    let mut sorted = findings.to_vec();
187    sorted.sort_by_key(|finding| (finding.span.start, std::cmp::Reverse(finding.span.end)));
188
189    let mut keep: Vec<PiiEntity> = Vec::with_capacity(sorted.len());
190    for finding in sorted {
191        if let Some(last) = keep.last_mut() {
192            if finding.span.overlaps(last.span) {
193                let merged = merge_overlapping(last, &finding);
194                *last = merged;
195                continue;
196            }
197        }
198        keep.push(finding);
199    }
200
201    keep
202}
203
204fn merge_overlapping(left: &PiiEntity, right: &PiiEntity) -> PiiEntity {
205    if left.span.start == right.span.start && left.span.end == right.span.end {
206        return if right.confidence > left.confidence {
207            right.clone()
208        } else {
209            left.clone()
210        };
211    }
212
213    if right.span.len() > left.span.len()
214        || (right.span.len() == left.span.len() && right.confidence > left.confidence)
215    {
216        right.clone()
217    } else {
218        left.clone()
219    }
220}
221
222fn validate_span(text: &str, finding: &PiiEntity) -> Result<()> {
223    let start = finding.span.start;
224    let end = finding.span.end;
225    if start <= end
226        && end <= text.len()
227        && text.is_char_boundary(start)
228        && text.is_char_boundary(end)
229    {
230        Ok(())
231    } else {
232        Err(CloakError::InvalidSpan {
233            start,
234            end,
235            len: text.len(),
236        })
237    }
238}
239
240fn partial_mask(
241    finding: &PiiEntity,
242    reveal_prefix: usize,
243    reveal_suffix: usize,
244    mask_char: char,
245) -> String {
246    match finding.entity_type {
247        EntityType::Email => mask_email(&finding.text, mask_char),
248        EntityType::CreditCard => mask_preserving_separators(&finding.text, 0, 4, mask_char),
249        EntityType::PhoneNumber => mask_phone(&finding.text, mask_char),
250        EntityType::Ssn => mask_preserving_separators(&finding.text, 0, 4, mask_char),
251        EntityType::Iban => mask_preserving_separators(&finding.text, 2, 4, mask_char),
252        EntityType::IpAddress => mask_ip(&finding.text),
253        EntityType::Hostname => mask_hostname(&finding.text, mask_char),
254        EntityType::UserPath => mask_user_path(&finding.text, mask_char),
255        EntityType::Bsn => mask_preserving_separators(&finding.text, 0, 3, mask_char),
256        EntityType::Aadhaar => mask_preserving_separators(&finding.text, 0, 4, mask_char),
257        EntityType::Jwt => mask_jwt(&finding.text),
258        EntityType::ApiKey | EntityType::AwsAccessKey => {
259            mask_generic(&finding.text, 4, 4, mask_char)
260        }
261        _ => mask_generic(&finding.text, reveal_prefix, reveal_suffix, mask_char),
262    }
263}
264
265fn hash_mask(finding: &PiiEntity, salt: Option<&str>, length: usize) -> String {
266    hash_value(&finding.text, salt, length)
267}
268
269fn hash_value(value: &str, salt: Option<&str>, length: usize) -> String {
270    let length = length.clamp(MIN_HASH_LENGTH, MAX_HASH_LENGTH);
271    let mut hasher = Sha256::new();
272    if let Some(salt) = salt {
273        hasher.update(salt.as_bytes());
274    }
275    hasher.update(value.as_bytes());
276    let digest = hasher.finalize();
277    let hex = to_hex(&digest);
278    format!("HASH:{}", &hex[..length])
279}
280
281fn hash_user_path(path: &str, salt: Option<&str>) -> String {
282    let Some((range, username)) = user_path_username_range(path) else {
283        return hash_value(path, salt, DEFAULT_HASH_LENGTH);
284    };
285    replace_range_owned(
286        path,
287        range,
288        &hash_value(username, salt, DEFAULT_HASH_LENGTH),
289    )
290}
291
292fn replace_mask(finding: &PiiEntity) -> String {
293    let seed = deterministic_seed(&finding.text);
294    match finding.entity_type {
295        EntityType::Email => format!("user{}@example.test", seed % 10_000),
296        EntityType::PhoneNumber => format!("+1 555 010 {:04}", seed % 10_000),
297        EntityType::CreditCard => fake_card(seed),
298        EntityType::Ssn => format!(
299            "9{:02}-{:02}-{:04}",
300            seed % 100,
301            (seed / 100) % 100,
302            (seed / 10_000) % 10_000
303        ),
304        EntityType::Iban => fake_iban(&finding.text),
305        EntityType::Bsn => format!("99999{:04}", seed % 10_000),
306        EntityType::Aadhaar => format!("9999 9999 {:04}", seed % 10_000),
307        EntityType::IpAddress => "192.0.2.1".to_string(),
308        EntityType::Url => "https://example.test/redacted".to_string(),
309        EntityType::Jwt => "eyJhbGciOiJIUzI1NiJ9.[REPLACED].signature".to_string(),
310        EntityType::ApiKey => format!("test_key_{:016x}", seed),
311        EntityType::AwsAccessKey => "AKIAIOSFODNN7EXAMPLE".to_string(),
312        EntityType::CryptoAddress => "0x0000000000000000000000000000000000000000".to_string(),
313        EntityType::MacAddress => "02:00:00:00:00:01".to_string(),
314        EntityType::Hostname => "host.example.test".to_string(),
315        EntityType::UserPath => "/home/user/redacted".to_string(),
316        _ => finding
317            .entity_type
318            .redaction_tag()
319            .replace('[', "[REPLACED_"),
320    }
321}
322
323fn fake_card(seed: u64) -> String {
324    const CARDS: &[&str] = &[
325        "4111111111111111",
326        "5555555555554444",
327        "378282246310005",
328        "6011111111111117",
329    ];
330    CARDS[(seed as usize) % CARDS.len()].to_string()
331}
332
333fn fake_iban(original: &str) -> String {
334    match original.get(..2).map(str::to_ascii_uppercase).as_deref() {
335        Some("DE") => "DE89 3704 0044 0532 0130 00".to_string(),
336        Some("GB") => "GB29 NWBK 6016 1331 9268 19".to_string(),
337        Some("FR") => "FR14 2004 1010 0505 0001 3M02 606".to_string(),
338        _ => "NL91 ABNA 0417 1643 00".to_string(),
339    }
340}
341
342fn encrypt_mask(finding: &PiiEntity, key: &str) -> Result<String> {
343    let key = parse_hex_key(key)?;
344    let cipher = Aes256Gcm::new_from_slice(&key)
345        .map_err(|error| CloakError::EncryptionError(error.to_string()))?;
346    let nonce_bytes = derive_nonce(finding);
347    let nonce = Nonce::from_slice(&nonce_bytes);
348    let ciphertext = cipher
349        .encrypt(nonce, finding.text.as_bytes())
350        .map_err(|error| CloakError::EncryptionError(error.to_string()))?;
351
352    let mut payload = Vec::with_capacity(NONCE_LENGTH + ciphertext.len());
353    payload.extend_from_slice(&nonce_bytes);
354    payload.extend_from_slice(&ciphertext);
355    Ok(format!("ENC[{}]", BASE64_STANDARD.encode(payload)))
356}
357
358fn parse_hex_key(key: &str) -> Result<[u8; 32]> {
359    if key.len() != 64 {
360        return Err(CloakError::EncryptionError(
361            "encryption key must be 32 bytes encoded as 64 hex characters".to_string(),
362        ));
363    }
364
365    let mut bytes = [0u8; 32];
366    for (index, chunk) in key.as_bytes().chunks_exact(2).enumerate() {
367        let high = hex_nibble(chunk[0])?;
368        let low = hex_nibble(chunk[1])?;
369        bytes[index] = (high << 4) | low;
370    }
371    Ok(bytes)
372}
373
374fn hex_nibble(byte: u8) -> Result<u8> {
375    match byte {
376        b'0'..=b'9' => Ok(byte - b'0'),
377        b'a'..=b'f' => Ok(byte - b'a' + 10),
378        b'A'..=b'F' => Ok(byte - b'A' + 10),
379        _ => Err(CloakError::EncryptionError(
380            "encryption key must contain only hex characters".to_string(),
381        )),
382    }
383}
384
385fn derive_nonce(finding: &PiiEntity) -> [u8; NONCE_LENGTH] {
386    let mut hasher = Sha256::new();
387    hasher.update(finding.entity_type.redaction_tag().as_bytes());
388    hasher.update(finding.span.start.to_le_bytes());
389    hasher.update(finding.span.end.to_le_bytes());
390    hasher.update(finding.recognizer_id.as_bytes());
391    hasher.update(finding.text.as_bytes());
392    let digest = hasher.finalize();
393    let mut nonce = [0u8; NONCE_LENGTH];
394    nonce.copy_from_slice(&digest[..NONCE_LENGTH]);
395    nonce
396}
397
398fn deterministic_seed(value: &str) -> u64 {
399    let digest = Sha256::digest(value.as_bytes());
400    let mut bytes = [0u8; 8];
401    bytes.copy_from_slice(&digest[..8]);
402    u64::from_le_bytes(bytes)
403}
404
405fn to_hex(bytes: &[u8]) -> String {
406    const HEX: &[u8; 16] = b"0123456789abcdef";
407    let mut out = String::with_capacity(bytes.len() * 2);
408    for byte in bytes {
409        out.push(HEX[(byte >> 4) as usize] as char);
410        out.push(HEX[(byte & 0x0f) as usize] as char);
411    }
412    out
413}
414
415fn mask_email(email: &str, mask_char: char) -> String {
416    let Some((local, domain)) = email.split_once('@') else {
417        return mask_generic(email, 0, 0, mask_char);
418    };
419
420    if local.is_empty() {
421        return format!("{mask_char}@{domain}");
422    }
423
424    let mut chars = local.chars();
425    let Some(first) = chars.next() else {
426        return format!("{mask_char}@{domain}");
427    };
428    let masked_count = chars.count();
429    format!(
430        "{first}{}@{domain}",
431        mask_char.to_string().repeat(masked_count)
432    )
433}
434
435fn mask_preserving_separators(
436    value: &str,
437    reveal_prefix: usize,
438    reveal_suffix: usize,
439    mask_char: char,
440) -> String {
441    let sensitive_count = value.chars().filter(|c| c.is_ascii_alphanumeric()).count();
442    let mut sensitive_index = 0usize;
443
444    value
445        .chars()
446        .map(|c| {
447            if c.is_ascii_alphanumeric() {
448                sensitive_index += 1;
449                if sensitive_index <= reveal_prefix
450                    || sensitive_index > sensitive_count.saturating_sub(reveal_suffix)
451                {
452                    c
453                } else {
454                    mask_char
455                }
456            } else {
457                c
458            }
459        })
460        .collect()
461}
462
463fn mask_phone(phone: &str, mask_char: char) -> String {
464    let reveal_prefix = usize::from(phone.starts_with('+')) * 2;
465    mask_preserving_separators(phone, reveal_prefix, 2, mask_char)
466}
467
468fn mask_ip(ip: &str) -> String {
469    let parts: Vec<&str> = ip.split('.').collect();
470    if parts.len() == 4 {
471        format!("{}.{}.*.*", parts[0], parts[1])
472    } else {
473        "*".repeat(ip.chars().count())
474    }
475}
476
477fn mask_hostname(hostname: &str, mask_char: char) -> String {
478    let labels: Vec<&str> = hostname.split('.').collect();
479    if labels.len() < 2 {
480        return mask_generic(hostname, 0, 0, mask_char);
481    }
482
483    let last_index = labels.len() - 1;
484    labels
485        .iter()
486        .enumerate()
487        .map(|(index, label)| {
488            if index == last_index || is_preserved_hostname_label(label) {
489                (*label).to_string()
490            } else if index == 0 {
491                mask_hostname_first_label(label, mask_char)
492            } else {
493                mask_preserving_label_separators(label, mask_char)
494            }
495        })
496        .collect::<Vec<_>>()
497        .join(".")
498}
499
500fn is_preserved_hostname_label(label: &str) -> bool {
501    matches!(
502        label.to_ascii_lowercase().as_str(),
503        "internal" | "local" | "lan" | "corp" | "private" | "intranet"
504    )
505}
506
507fn mask_hostname_first_label(label: &str, mask_char: char) -> String {
508    if let Some((prefix, rest)) = label.split_once('-') {
509        if !prefix.is_empty() && !rest.is_empty() {
510            return format!(
511                "{prefix}-{}",
512                mask_preserving_label_separators(rest, mask_char)
513            );
514        }
515    }
516    mask_generic(label, 2.min(label.chars().count()), 0, mask_char)
517}
518
519fn mask_preserving_label_separators(label: &str, mask_char: char) -> String {
520    label
521        .chars()
522        .map(|c| {
523            if c.is_ascii_alphanumeric() {
524                mask_char
525            } else {
526                c
527            }
528        })
529        .collect()
530}
531
532fn mask_user_path(path: &str, mask_char: char) -> String {
533    let Some((range, username)) = user_path_username_range(path) else {
534        return mask_generic(path, 0, 0, mask_char);
535    };
536    replace_range_owned(
537        path,
538        range,
539        &mask_char.to_string().repeat(username.chars().count()),
540    )
541}
542
543fn user_path_username_range(path: &str) -> Option<(std::ops::Range<usize>, &str)> {
544    if let Some(rest) = path.strip_prefix("/home/") {
545        return username_range_after_prefix(path, "/home/".len(), rest, '/');
546    }
547    if let Some(rest) = path.strip_prefix("/Users/") {
548        return username_range_after_prefix(path, "/Users/".len(), rest, '/');
549    }
550    let lower = path.to_ascii_lowercase();
551    if let Some(index) = lower.find(r"\users\") {
552        let prefix_end = index + r"\Users\".len();
553        return username_range_after_prefix(path, prefix_end, &path[prefix_end..], '\\');
554    }
555    if path == "/root" || path.starts_with("/root/") {
556        return Some((1..5, &path[1..5]));
557    }
558    None
559}
560
561fn username_range_after_prefix<'a>(
562    path: &'a str,
563    prefix_end: usize,
564    rest: &'a str,
565    separator: char,
566) -> Option<(std::ops::Range<usize>, &'a str)> {
567    let username_len = rest.find(separator).unwrap_or(rest.len());
568    (username_len > 0).then(|| {
569        let start = prefix_end;
570        let end = prefix_end + username_len;
571        (start..end, &path[start..end])
572    })
573}
574
575fn replace_range_owned(value: &str, range: std::ops::Range<usize>, replacement: &str) -> String {
576    let mut result = value.to_string();
577    result.replace_range(range, replacement);
578    result
579}
580
581fn mask_jwt(jwt: &str) -> String {
582    let prefix: String = jwt.chars().take(10).collect();
583    format!("{prefix}[TRUNCATED]")
584}
585
586fn mask_generic(
587    value: &str,
588    reveal_prefix: usize,
589    reveal_suffix: usize,
590    mask_char: char,
591) -> String {
592    let chars: Vec<char> = value.chars().collect();
593    let len = chars.len();
594    let prefix = reveal_prefix.min(len);
595    let suffix = reveal_suffix.min(len.saturating_sub(prefix));
596    let mask_len = len.saturating_sub(prefix + suffix);
597
598    let mut result = String::with_capacity(value.len());
599    result.extend(&chars[..prefix]);
600    result.extend(std::iter::repeat(mask_char).take(mask_len));
601    result.extend(&chars[len - suffix..]);
602    result
603}
604
605#[cfg(test)]
606mod tests {
607    use super::*;
608    use crate::{Confidence, Span};
609
610    fn finding(entity_type: EntityType, start: usize, end: usize, text: &str) -> PiiEntity {
611        PiiEntity {
612            entity_type,
613            span: Span::new(start, end),
614            text: text.to_string(),
615            confidence: Confidence::new(0.9).unwrap(),
616            recognizer_id: "test_v1".to_string(),
617        }
618    }
619
620    #[test]
621    fn test_apply_mask_redact_replaces_pii_with_tag() {
622        let text = "Email user@example.com now";
623        let findings = [finding(EntityType::Email, 6, 22, "user@example.com")];
624        let masked = apply_mask(text, &findings, &MaskStrategy::Redact).unwrap();
625        assert_eq!(masked, "Email [EMAIL] now");
626    }
627
628    #[test]
629    fn test_apply_mask_uses_reverse_span_order() {
630        let text = "a@b.co and c@d.co";
631        let findings = [
632            finding(EntityType::Email, 0, 6, "a@b.co"),
633            finding(EntityType::Email, 11, 17, "c@d.co"),
634        ];
635        let masked = apply_mask(text, &findings, &MaskStrategy::Redact).unwrap();
636        assert_eq!(masked, "[EMAIL] and [EMAIL]");
637    }
638
639    #[test]
640    fn test_partial_mask_email_preserves_domain() {
641        let item = finding(EntityType::Email, 0, 16, "john@example.com");
642        let masked = MaskStrategy::PartialMask {
643            reveal_prefix: 0,
644            reveal_suffix: 0,
645            mask_char: '*',
646        }
647        .replacement(&item);
648        assert_eq!(masked, "j***@example.com");
649    }
650
651    #[test]
652    fn test_partial_mask_credit_card_preserves_separators() {
653        let item = finding(EntityType::CreditCard, 0, 19, "4111-1111-1111-1111");
654        let masked = MaskStrategy::PartialMask {
655            reveal_prefix: 0,
656            reveal_suffix: 4,
657            mask_char: '*',
658        }
659        .replacement(&item);
660        assert_eq!(masked, "****-****-****-1111");
661    }
662
663    #[test]
664    fn test_partial_mask_hostname_preserves_structure() {
665        let item = finding(
666            EntityType::Hostname,
667            0,
668            31,
669            "db-prod-01.internal.company.com",
670        );
671        let masked = MaskStrategy::PartialMask {
672            reveal_prefix: 0,
673            reveal_suffix: 0,
674            mask_char: '*',
675        }
676        .replacement(&item);
677        assert_eq!(masked, "db-****-**.internal.*******.com");
678    }
679
680    #[test]
681    fn test_partial_mask_user_path_masks_username_only() {
682        let item = finding(EntityType::UserPath, 0, 24, "/home/kadir/projects/app");
683        let masked = MaskStrategy::PartialMask {
684            reveal_prefix: 0,
685            reveal_suffix: 0,
686            mask_char: '*',
687        }
688        .replacement(&item);
689        assert_eq!(masked, "/home/*****/projects/app");
690    }
691
692    #[test]
693    fn test_partial_mask_windows_user_path_masks_username_only() {
694        let item = finding(EntityType::UserPath, 0, 25, r"C:\Users\john.doe\Desktop");
695        let masked = MaskStrategy::PartialMask {
696            reveal_prefix: 0,
697            reveal_suffix: 0,
698            mask_char: '*',
699        }
700        .replacement(&item);
701        assert_eq!(masked, r"C:\Users\********\Desktop");
702    }
703
704    #[test]
705    fn test_apply_mask_invalid_span_returns_error() {
706        let text = "short";
707        let findings = [finding(EntityType::Email, 0, 99, "short")];
708        assert!(apply_mask(text, &findings, &MaskStrategy::Redact).is_err());
709    }
710
711    #[test]
712    fn test_hash_mask_is_deterministic_without_salt() {
713        let item = finding(EntityType::Email, 0, 16, "user@example.com");
714        let strategy = MaskStrategy::Hash { salt: None };
715        assert_eq!(strategy.replacement(&item), strategy.replacement(&item));
716    }
717
718    #[test]
719    fn test_hash_mask_uses_expected_prefix_and_default_length() {
720        let item = finding(EntityType::Email, 0, 16, "user@example.com");
721        let replacement = MaskStrategy::Hash { salt: None }.replacement(&item);
722        assert!(replacement.starts_with("HASH:"));
723        assert_eq!(replacement.len(), "HASH:".len() + DEFAULT_HASH_LENGTH);
724    }
725
726    #[test]
727    fn test_hash_mask_salt_changes_output() {
728        let item = finding(EntityType::Email, 0, 16, "user@example.com");
729        let without_salt = MaskStrategy::Hash { salt: None }.replacement(&item);
730        let with_salt = MaskStrategy::Hash {
731            salt: Some("prod".to_string()),
732        }
733        .replacement(&item);
734        assert_ne!(without_salt, with_salt);
735    }
736
737    #[test]
738    fn test_hash_mask_user_path_hashes_username_only() {
739        let item = finding(EntityType::UserPath, 0, 24, "/home/kadir/projects/app");
740        let replacement = MaskStrategy::Hash { salt: None }.replacement(&item);
741        assert!(replacement.starts_with("/home/HASH:"));
742        assert!(replacement.ends_with("/projects/app"));
743    }
744
745    #[test]
746    fn test_hash_mask_length_is_clamped_to_bounds() {
747        let item = finding(EntityType::Email, 0, 16, "user@example.com");
748        assert_eq!(
749            hash_mask(&item, None, 2).len(),
750            "HASH:".len() + MIN_HASH_LENGTH
751        );
752        assert_eq!(
753            hash_mask(&item, None, 128).len(),
754            "HASH:".len() + MAX_HASH_LENGTH
755        );
756    }
757
758    #[test]
759    fn test_replace_email_uses_example_test_domain() {
760        let item = finding(EntityType::Email, 0, 16, "user@example.com");
761        let replacement = MaskStrategy::Replace.replacement(&item);
762        assert!(replacement.ends_with("@example.test"));
763    }
764
765    #[test]
766    fn test_replace_is_deterministic_for_same_input() {
767        let item = finding(EntityType::Email, 0, 16, "user@example.com");
768        assert_eq!(
769            MaskStrategy::Replace.replacement(&item),
770            MaskStrategy::Replace.replacement(&item)
771        );
772    }
773
774    #[test]
775    fn test_replace_credit_card_uses_luhn_valid_test_number() {
776        let item = finding(EntityType::CreditCard, 0, 16, "4111111111111111");
777        let replacement = MaskStrategy::Replace.replacement(&item);
778        assert!(luhn_valid_digits(&replacement));
779    }
780
781    #[test]
782    fn test_replace_ssn_uses_reserved_area_range() {
783        let item = finding(EntityType::Ssn, 0, 11, "123-45-6789");
784        let replacement = MaskStrategy::Replace.replacement(&item);
785        assert!(replacement.starts_with('9'));
786    }
787
788    #[test]
789    fn test_encrypt_mask_round_trips() {
790        let key = test_key();
791        let item = finding(EntityType::Email, 0, 16, "user@example.com");
792        let encrypted = MaskStrategy::Encrypt { key: key.clone() }
793            .try_replacement(&item)
794            .unwrap();
795        assert!(encrypted.starts_with("ENC["));
796        assert_eq!(
797            decrypt_masked_value(&encrypted, &key).unwrap(),
798            "user@example.com"
799        );
800    }
801
802    #[test]
803    fn test_encrypt_mask_is_deterministic_for_same_finding() {
804        let key = test_key();
805        let item = finding(EntityType::Email, 0, 16, "user@example.com");
806        let strategy = MaskStrategy::Encrypt { key };
807        assert_eq!(
808            strategy.try_replacement(&item).unwrap(),
809            strategy.try_replacement(&item).unwrap()
810        );
811    }
812
813    #[test]
814    fn test_encrypt_mask_invalid_key_returns_error() {
815        let item = finding(EntityType::Email, 0, 16, "user@example.com");
816        assert!(MaskStrategy::Encrypt {
817            key: "short".to_string()
818        }
819        .try_replacement(&item)
820        .is_err());
821    }
822
823    #[test]
824    fn test_decrypt_masked_value_wrong_key_fails() {
825        let item = finding(EntityType::Email, 0, 16, "user@example.com");
826        let encrypted = MaskStrategy::Encrypt { key: test_key() }
827            .try_replacement(&item)
828            .unwrap();
829        assert!(decrypt_masked_value(
830            &encrypted,
831            "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
832        )
833        .is_err());
834    }
835
836    #[test]
837    fn test_decrypt_masked_value_tampered_ciphertext_fails() {
838        let item = finding(EntityType::Email, 0, 16, "user@example.com");
839        let encrypted = MaskStrategy::Encrypt { key: test_key() }
840            .try_replacement(&item)
841            .unwrap();
842        let tampered = encrypted.replace("A", "B");
843        assert!(decrypt_masked_value(&tampered, &test_key()).is_err());
844    }
845
846    fn test_key() -> String {
847        "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f".to_string()
848    }
849
850    fn luhn_valid_digits(value: &str) -> bool {
851        let digits: Vec<u32> = value.chars().filter_map(|c| c.to_digit(10)).collect();
852        let mut sum = 0u32;
853        let mut double = false;
854        for digit in digits.iter().rev() {
855            let mut value = *digit;
856            if double {
857                value *= 2;
858                if value > 9 {
859                    value -= 9;
860                }
861            }
862            sum += value;
863            double = !double;
864        }
865        sum % 10 == 0
866    }
867}