zer_compare/
discretize.rs1use zer_core::{comparison::ComparisonLevel, schema::FieldKind};
2
3#[derive(Debug, Clone)]
8pub struct LevelThresholds {
9 pub exact: f32,
11 pub close: f32,
13 pub partial: f32,
15 }
17
18impl LevelThresholds {
19 pub fn for_kind(kind: FieldKind) -> Self {
21 match kind {
22 FieldKind::Name => Self { exact: 0.92, close: 0.75, partial: 0.50 },
23 FieldKind::Date
24 | FieldKind::Timestamp => Self { exact: 0.99, close: 0.85, partial: 0.60 },
25 FieldKind::Phone => Self { exact: 0.98, close: 0.90, partial: 0.70 },
26 FieldKind::Address => Self { exact: 0.90, close: 0.70, partial: 0.40 },
27 FieldKind::Id => Self { exact: 0.99, close: 0.90, partial: 0.75 },
28 FieldKind::LicensePlate => Self { exact: 0.99, close: 0.75, partial: 0.50 },
29 FieldKind::Numeric
30 | FieldKind::GpsCoordinate => Self { exact: 0.95, close: 0.80, partial: 0.50 },
31 FieldKind::Categorical => Self { exact: 1.00, close: 0.95, partial: 0.70 },
32 FieldKind::FreeText => Self { exact: 0.90, close: 0.65, partial: 0.35 },
33 FieldKind::Alias => Self { exact: 0.90, close: 0.65, partial: 0.35 },
34 }
35 }
36
37 pub fn apply(&self, sim: f32) -> ComparisonLevel {
39 if sim >= self.exact { ComparisonLevel::Exact }
40 else if sim >= self.close { ComparisonLevel::Close }
41 else if sim >= self.partial { ComparisonLevel::Partial }
42 else { ComparisonLevel::None }
43 }
44}
45
46#[cfg(test)]
47mod tests {
48 use super::*;
49
50 #[test]
51 fn name_thresholds_produce_correct_levels() {
52 let t = LevelThresholds::for_kind(FieldKind::Name);
53 assert_eq!(t.apply(0.95), ComparisonLevel::Exact);
54 assert_eq!(t.apply(0.80), ComparisonLevel::Close);
55 assert_eq!(t.apply(0.60), ComparisonLevel::Partial);
56 assert_eq!(t.apply(0.30), ComparisonLevel::None);
57 }
58
59 #[test]
60 fn categorical_is_binary() {
61 let t = LevelThresholds::for_kind(FieldKind::Categorical);
62 assert_eq!(t.apply(1.00), ComparisonLevel::Exact);
63 assert_eq!(t.apply(0.99), ComparisonLevel::Close);
64 assert_eq!(t.apply(0.00), ComparisonLevel::None);
65 }
66
67 #[test]
68 fn date_thresholds_tight_bands() {
69 let t = LevelThresholds::for_kind(FieldKind::Date);
70 assert_eq!(t.apply(1.0), ComparisonLevel::Exact);
72 assert_eq!(t.apply(0.9), ComparisonLevel::Close);
74 assert_eq!(t.apply(0.75), ComparisonLevel::Partial);
76 assert_eq!(t.apply(0.3), ComparisonLevel::None);
78 }
79
80 #[test]
81 fn boundary_values_are_exclusive_on_lower_bound() {
82 let t = LevelThresholds::for_kind(FieldKind::Name);
83 assert_eq!(t.apply(t.exact), ComparisonLevel::Exact);
85 assert_eq!(t.apply(t.exact - 0.01), ComparisonLevel::Close);
87 }
88}