pii 0.1.0

PII detection and anonymization with deterministic, capability-aware NLP pipelines.
Documentation
//! Candidate resolution and deterministic overlap handling.
//!
//! Recognizers may emit overlapping spans (e.g., a credit card number could
//! also match a generic digit pattern). The decision layer applies thresholds
//! and resolves overlaps using a deterministic precedence order so results
//! remain stable across runs and platforms.
//!
//! The current precedence is:
//! - higher score wins
//! - validator-backed detections win ties
//! - longer spans win remaining ties
//! - earlier spans win as a final tie-breaker

use crate::types::{Detection, DetectionExplanation};

/// Configuration for overlap resolution decisions.
#[derive(Clone, Debug)]
pub struct DecisionConfig {
    /// Floating-point tolerance for score comparisons.
    pub epsilon: f32,
}

impl Default for DecisionConfig {
    fn default() -> Self {
        Self { epsilon: 0.0001 }
    }
}

/// Resolves overlapping detections and applies thresholds.
pub fn resolve(
    mut candidates: Vec<Detection>,
    thresholds: &dyn Fn(&Detection) -> f32,
) -> Vec<Detection> {
    candidates.retain(|det| det.score >= thresholds(det));
    candidates.sort_by(|a, b| {
        a.start
            .cmp(&b.start)
            .then_with(|| a.end.cmp(&b.end))
    });

    let mut resolved = Vec::new();
    for candidate in candidates {
        if let Some(last) = resolved.last_mut() {
            if overlaps(last, &candidate) {
                if prefer_candidate(last, &candidate) {
                    *last = candidate;
                } 
                continue;
            }
        }
        resolved.push(candidate);
    }

    resolved
}

/// Returns true if the spans overlap.
fn overlaps(a: &Detection, b: &Detection) -> bool {
    a.start < b.end && b.start < a.end
}

/// Chooses which candidate should win an overlap.
fn prefer_candidate(current: &Detection, challenger: &Detection) -> bool {
    if (current.score - challenger.score).abs() > 0.0001 {
        return challenger.score > current.score;
    }

    let current_validator = matches!(current.explanation, DetectionExplanation::Validator { .. });
    let challenger_validator = matches!(challenger.explanation, DetectionExplanation::Validator { .. });
    if current_validator != challenger_validator {
        return challenger_validator;
    }

    let current_len = current.end.saturating_sub(current.start);
    let challenger_len = challenger.end.saturating_sub(challenger.start);
    if current_len != challenger_len {
        return challenger_len > current_len;
    }

    challenger.start < current.start
}