use zer_core::{comparison::ComparisonLevel, schema::FieldKind};
#[derive(Debug, Clone)]
pub struct LevelThresholds {
pub exact: f32,
pub close: f32,
pub partial: f32,
}
impl LevelThresholds {
pub fn for_kind(kind: FieldKind) -> Self {
match kind {
FieldKind::Name => Self { exact: 0.92, close: 0.75, partial: 0.50 },
FieldKind::Date
| FieldKind::Timestamp => Self { exact: 0.99, close: 0.85, partial: 0.60 },
FieldKind::Phone => Self { exact: 0.98, close: 0.90, partial: 0.70 },
FieldKind::Address => Self { exact: 0.90, close: 0.70, partial: 0.40 },
FieldKind::Id => Self { exact: 0.99, close: 0.90, partial: 0.75 },
FieldKind::LicensePlate => Self { exact: 0.99, close: 0.75, partial: 0.50 },
FieldKind::Numeric
| FieldKind::GpsCoordinate => Self { exact: 0.95, close: 0.80, partial: 0.50 },
FieldKind::Categorical => Self { exact: 1.00, close: 0.95, partial: 0.70 },
FieldKind::FreeText => Self { exact: 0.90, close: 0.65, partial: 0.35 },
FieldKind::Alias => Self { exact: 0.90, close: 0.65, partial: 0.35 },
}
}
pub fn apply(&self, sim: f32) -> ComparisonLevel {
if sim >= self.exact { ComparisonLevel::Exact }
else if sim >= self.close { ComparisonLevel::Close }
else if sim >= self.partial { ComparisonLevel::Partial }
else { ComparisonLevel::None }
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn name_thresholds_produce_correct_levels() {
let t = LevelThresholds::for_kind(FieldKind::Name);
assert_eq!(t.apply(0.95), ComparisonLevel::Exact);
assert_eq!(t.apply(0.80), ComparisonLevel::Close);
assert_eq!(t.apply(0.60), ComparisonLevel::Partial);
assert_eq!(t.apply(0.30), ComparisonLevel::None);
}
#[test]
fn categorical_is_binary() {
let t = LevelThresholds::for_kind(FieldKind::Categorical);
assert_eq!(t.apply(1.00), ComparisonLevel::Exact);
assert_eq!(t.apply(0.99), ComparisonLevel::Close);
assert_eq!(t.apply(0.00), ComparisonLevel::None);
}
#[test]
fn date_thresholds_tight_bands() {
let t = LevelThresholds::for_kind(FieldKind::Date);
assert_eq!(t.apply(1.0), ComparisonLevel::Exact);
assert_eq!(t.apply(0.9), ComparisonLevel::Close);
assert_eq!(t.apply(0.75), ComparisonLevel::Partial);
assert_eq!(t.apply(0.3), ComparisonLevel::None);
}
#[test]
fn boundary_values_are_exclusive_on_lower_bound() {
let t = LevelThresholds::for_kind(FieldKind::Name);
assert_eq!(t.apply(t.exact), ComparisonLevel::Exact);
assert_eq!(t.apply(t.exact - 0.01), ComparisonLevel::Close);
}
}