pii 0.1.0

PII detection and anonymization with deterministic, capability-aware NLP pipelines.
Documentation
//! NER-backed recognizer that converts model spans into detections.
//!
//! This recognizer consumes `NlpArtifacts.ner` and emits detections with
//! model-provided scores. It respects capability flags, returning no
//! detections when NER is unavailable.

use crate::recognizers::Recognizer;
use crate::types::{Detection, DetectionExplanation, EntityType, NlpArtifacts};

/// Wraps NER spans from `NlpArtifacts` into detections.
#[derive(Clone, Debug)]
pub struct NerRecognizer {
    name: String,
    entity_map: Vec<(EntityType, EntityType)>,
}

impl NerRecognizer {
    /// Creates a new NER recognizer with optional entity remapping.
    pub fn new(name: impl Into<String>, entity_map: Vec<(EntityType, EntityType)>) -> Self {
        Self {
            name: name.into(),
            entity_map,
        }
    }

    fn map_entity(&self, entity: &EntityType) -> Option<EntityType> {
        self.entity_map
            .iter()
            .find(|(key, _)| key == entity)
            .map(|(_, entity)| entity.clone())
    }
}

impl Recognizer for NerRecognizer {
    fn name(&self) -> &str {
        &self.name
    }

    fn supported_entities(&self) -> &[EntityType] {
        &[]
    }

    fn analyze(&self, _text: &str, artifacts: &NlpArtifacts) -> Vec<Detection> {
        if !artifacts.capabilities.ner {
            return Vec::new();
        }
        artifacts
            .ner
            .iter()
            .filter_map(|span| {
                let entity_type = self
                    .map_entity(&span.entity_type)
                    .unwrap_or_else(|| span.entity_type.clone());
                Some(Detection {
                    entity_type,
                    start: span.start,
                    end: span.end,
                    score: span.score,
                    recognizer: self.name.clone(),
                    explanation: DetectionExplanation::Ner {
                        model: span.model.clone(),
                        raw_score: span.score,
                    },
                })
            })
            .collect()
    }
}