gaze-pii 0.7.0

Reversible PII pseudonymization runtime for agentic workflows
Documentation
use unicode_normalization::UnicodeNormalization;

pub struct NormalizedText {
    pub text: String,
    pub spans: Vec<(usize, usize)>,
}

pub fn normalize(input: &str) -> NormalizedText {
    let mut text = String::new();
    let mut spans = Vec::new();

    for (start, ch) in input.char_indices() {
        let end = start + ch.len_utf8();
        if matches!(ch, '\u{200C}' | '\u{200D}') {
            continue;
        }

        let mapped = fullwidth_to_ascii(ch);
        let normalized = mapped.to_string().nfc().collect::<String>();
        text.push_str(&normalized);
        for _ in 0..normalized.len() {
            spans.push((start, end));
        }
    }

    NormalizedText { text, spans }
}

fn fullwidth_to_ascii(ch: char) -> char {
    match ch {
        '\u{3000}' => ' ',
        '\u{FF01}'..='\u{FF5E}' => char::from_u32(ch as u32 - 0xFEE0).unwrap_or(ch),
        _ => ch,
    }
}