pii 0.1.0

PII detection and anonymization with deterministic, capability-aware NLP pipelines.
Documentation
//! Language profiles and context term configuration.
//!
//! Language profiles let you define per-language capabilities and
//! context terms. This supports controlled degradation: if a language
//! lacks lemma or NER support, the profile can report that explicitly
//! so enhancers and recognizers behave predictably.
//!
//! Profiles are optional. You can run with a single language-agnostic
//! pipeline and still get deterministic results for regex/validator
//! recognizers.

use crate::capabilities::Capabilities;
use crate::types::{EntityType, Language};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Context terms used to boost detections for an entity type.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ContextTerms {
    /// Number of tokens to scan on each side of a detection.
    pub window_tokens: usize,
    /// Score boost applied when any terms match.
    pub boost: f32,
    /// Terms to match (case-insensitive).
    pub terms: Vec<String>,
}

/// Defines capabilities and context terms for a language.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct LanguageProfile {
    /// Language code (e.g., "en").
    pub language: Language,
    /// Capabilities available for this language.
    pub capabilities: Capabilities,
    /// Entity-specific context term lists.
    pub context: HashMap<EntityType, ContextTerms>,
}

impl LanguageProfile {
    /// Creates a new profile with no context terms.
    pub fn new(language: Language, capabilities: Capabilities) -> Self {
        Self {
            language,
            capabilities,
            context: HashMap::new(),
        }
    }

    /// Adds context terms for a specific entity type.
    pub fn with_context(mut self, entity: EntityType, terms: ContextTerms) -> Self {
        self.context.insert(entity, terms);
        self
    }
}

/// Registry of language profiles keyed by language code.
#[derive(Clone, Debug, Default)]
pub struct LanguageRegistry {
    profiles: HashMap<Language, LanguageProfile>,
}

impl LanguageRegistry {
    /// Registers a language profile.
    pub fn insert(&mut self, profile: LanguageProfile) {
        self.profiles.insert(profile.language.clone(), profile);
    }

    /// Looks up a language profile.
    pub fn get(&self, language: &Language) -> Option<&LanguageProfile> {
        self.profiles.get(language)
    }
}