Skip to main content

cloakpipe_core/
replacer.rs

1//! Consistent pseudonymization engine.
2//!
3//! Takes detected entities and replaces them with stable pseudo-tokens
4//! using the vault for consistency across documents, queries, and sessions.
5
6use crate::{DetectedEntity, PseudonymizedText, vault::Vault};
7use anyhow::Result;
8use std::collections::HashMap;
9
10pub struct Replacer;
11
12impl Replacer {
13    /// Replace all detected entities in the text with pseudo-tokens.
14    /// Entities must be sorted by position (start offset) and non-overlapping.
15    pub fn pseudonymize(
16        text: &str,
17        entities: &[DetectedEntity],
18        vault: &mut Vault,
19    ) -> Result<PseudonymizedText> {
20        let mut result = String::with_capacity(text.len());
21        let mut mappings = HashMap::new();
22        let mut last_end = 0;
23
24        for entity in entities {
25            // Append text before this entity
26            if entity.start > last_end {
27                result.push_str(&text[last_end..entity.start]);
28            }
29
30            // Get or create a consistent pseudo-token
31            let token = vault.get_or_create(&entity.original, &entity.category);
32
33            // Record the mapping for rehydration
34            mappings.insert(token.token.clone(), entity.original.clone());
35
36            // Append the pseudo-token
37            result.push_str(&token.token);
38            last_end = entity.end;
39        }
40
41        // Append remaining text after last entity
42        if last_end < text.len() {
43            result.push_str(&text[last_end..]);
44        }
45
46        Ok(PseudonymizedText {
47            text: result,
48            mappings,
49            entities: entities.to_vec(),
50        })
51    }
52}