pii 0.1.0 - Docs.rs

//! Candle-backed NER adapter for the PII pipeline.
//!
//! This module lets you augment any base `NlpEngine` with Candle-powered
//! NER spans. The base engine is still responsible for token offsets and
//! sentence segmentation; the Candle model only supplies entity spans.
//!
//! Implement `CandleNerModel` to integrate a specific model. The adapter
//! will mark `capabilities.ner = true` when any spans are produced.

use crate::error::{PiiError, PiiResult};
use crate::nlp::NlpEngine;
use crate::types::{Language, NerSpan, NlpArtifacts};
use candle::Device;

/// Minimal interface for a Candle-based NER model.
pub trait CandleNerModel: Send + Sync {
    /// Returns a stable model identifier.
    fn model_name(&self) -> &str;
    /// Runs inference and returns labeled spans.
    fn infer(&self, device: &Device, text: &str, language: &Language) -> PiiResult<Vec<NerSpan>>;
}

/// Wraps a base NLP engine and augments it with Candle NER output.
pub struct CandleNerEngine {
    base: Box<dyn NlpEngine>,
    model: Box<dyn CandleNerModel>,
    device: Device,
}

impl CandleNerEngine {
    /// Creates a new Candle-backed NER engine (CPU by default).
    pub fn new(base: Box<dyn NlpEngine>, model: Box<dyn CandleNerModel>) -> PiiResult<Self> {
        let device = Device::Cpu;
        Ok(Self { base, model, device })
    }
}

impl NlpEngine for CandleNerEngine {
    fn analyze(&self, text: &str, language: &Language) -> PiiResult<NlpArtifacts> {
        let mut artifacts = self.base.analyze(text, language)?;
        let spans = self
            .model
            .infer(&self.device, text, language)
            .map_err(|err| PiiError::NlpEngine(err.to_string()))?;
        artifacts.ner = spans;
        artifacts.capabilities.ner = !artifacts.ner.is_empty();
        Ok(artifacts)
    }
}