zer-cluster 1.0.1

Connected-components clustering and entity storage for the zer entity-resolution library
Documentation
use zer_core::scoring::{MatchBand, ModelParams, ScoredPair};

/// Pairs partitioned by their match band.
pub struct BandedPairs {
    pub auto_match:  Vec<ScoredPair>,
    pub borderline:  Vec<ScoredPair>,
    pub auto_reject: Vec<ScoredPair>,
}

/// Classify each pair by `match_probability` vs the upper/lower thresholds in
/// `params`. A pair is `AutoMatch` if `prob >= upper_threshold`, `AutoReject`
/// if `prob < lower_threshold`, and `Borderline` otherwise.
///
/// The band already stored in `ScoredPair::band` is used directly, it must
/// have been assigned by the same `ModelParams` that are passed here. If the
/// stored band disagrees with the thresholds (e.g., params were updated after
/// scoring), the stored band takes precedence so that provenance is preserved.
pub fn partition_by_band(pairs: Vec<ScoredPair>, _params: &ModelParams) -> BandedPairs {
    let mut auto_match  = Vec::new();
    let mut borderline  = Vec::new();
    let mut auto_reject = Vec::new();

    for pair in pairs {
        match pair.band {
            MatchBand::AutoMatch  => auto_match.push(pair),
            MatchBand::Borderline => borderline.push(pair),
            MatchBand::AutoReject => auto_reject.push(pair),
        }
    }

    BandedPairs { auto_match, borderline, auto_reject }
}

// ── Unit tests ────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use zer_core::{comparison::ComparisonVector, scoring::MatchBand};

    fn params() -> ModelParams {
        ModelParams {
            m: vec![],
            u: vec![],
            log_prior_odds: 0.0,
            upper_threshold: 0.8,
            lower_threshold: 0.2,
        }
    }

    fn pair(a: u64, b: u64, prob: f32, band: MatchBand) -> ScoredPair {
        ScoredPair {
            record_a:          a,
            record_b:          b,
            match_weight:      0.0,
            match_probability: prob,
            vector:            ComparisonVector { record_a: a, record_b: b, levels: vec![] },
            band,
        }
    }

    #[test]
    fn empty_input_returns_empty_partitions() {
        let result = partition_by_band(vec![], &params());
        assert!(result.auto_match.is_empty());
        assert!(result.borderline.is_empty());
        assert!(result.auto_reject.is_empty());
    }

    #[test]
    fn all_auto_match() {
        let pairs = vec![
            pair(1, 2, 0.95, MatchBand::AutoMatch),
            pair(3, 4, 0.90, MatchBand::AutoMatch),
        ];
        let result = partition_by_band(pairs, &params());
        assert_eq!(result.auto_match.len(), 2);
        assert!(result.borderline.is_empty());
        assert!(result.auto_reject.is_empty());
    }

    #[test]
    fn all_auto_reject() {
        let pairs = vec![
            pair(1, 2, 0.05, MatchBand::AutoReject),
            pair(3, 4, 0.10, MatchBand::AutoReject),
        ];
        let result = partition_by_band(pairs, &params());
        assert!(result.auto_match.is_empty());
        assert!(result.borderline.is_empty());
        assert_eq!(result.auto_reject.len(), 2);
    }

    #[test]
    fn mixed_bands() {
        let pairs = vec![
            pair(1, 2, 0.95, MatchBand::AutoMatch),
            pair(2, 3, 0.50, MatchBand::Borderline),
            pair(4, 5, 0.05, MatchBand::AutoReject),
        ];
        let result = partition_by_band(pairs, &params());
        assert_eq!(result.auto_match.len(), 1);
        assert_eq!(result.borderline.len(), 1);
        assert_eq!(result.auto_reject.len(), 1);
    }
}