pii 0.1.0

PII detection and anonymization with deterministic, capability-aware NLP pipelines.
Documentation
//! Policy configuration for entity filtering and thresholds.
//!
//! The policy layer controls what the analyzer returns. It lets you:
//! - enforce an allowlist of entity types
//! - set per-entity score thresholds
//! - define a default threshold for anything not explicitly configured
//!
//! This keeps policy separate from detection logic so you can reuse a single
//! pipeline across multiple products or tenants with different privacy rules.

use crate::types::EntityType;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};

/// Policy settings that control which entities are returned and their thresholds.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PolicyConfig {
    /// Explicit allowlist of entities. Empty means all enabled.
    pub enabled_entities: HashSet<EntityType>,
    /// Per-entity score thresholds.
    pub thresholds: HashMap<EntityType, f32>,
    /// Default threshold used when no per-entity value is present.
    pub default_threshold: f32,
}

impl Default for PolicyConfig {
    fn default() -> Self {
        Self {
            enabled_entities: HashSet::new(),
            thresholds: HashMap::new(),
            default_threshold: 0.5,
        }
    }
}

impl PolicyConfig {
    /// Returns the threshold for the provided entity type.
    pub fn threshold_for(&self, entity: &EntityType) -> f32 {
        self.thresholds
            .get(entity)
            .copied()
            .unwrap_or(self.default_threshold)
    }

    /// Returns true when the entity is enabled under the current policy.
    pub fn is_enabled(&self, entity: &EntityType) -> bool {
        if self.enabled_entities.is_empty() {
            return true;
        }
        self.enabled_entities.contains(entity)
    }
}