zer_compare/
discretize.rs1use zer_core::{comparison::ComparisonLevel, schema::FieldKind};
2
3#[derive(Debug, Clone)]
8pub struct LevelThresholds {
9 pub exact: f32,
11 pub close: f32,
13 pub partial: f32,
15 }
17
18impl LevelThresholds {
19 pub fn for_kind(kind: FieldKind) -> Self {
21 match kind {
22 FieldKind::Name => Self {
23 exact: 0.92,
24 close: 0.75,
25 partial: 0.50,
26 },
27 FieldKind::Date | FieldKind::Timestamp => Self {
28 exact: 0.99,
29 close: 0.85,
30 partial: 0.60,
31 },
32 FieldKind::Phone => Self {
33 exact: 0.98,
34 close: 0.90,
35 partial: 0.70,
36 },
37 FieldKind::Address => Self {
38 exact: 0.90,
39 close: 0.70,
40 partial: 0.40,
41 },
42 FieldKind::Id => Self {
43 exact: 0.99,
44 close: 0.90,
45 partial: 0.75,
46 },
47 FieldKind::LicensePlate => Self {
48 exact: 0.99,
49 close: 0.75,
50 partial: 0.50,
51 },
52 FieldKind::Numeric | FieldKind::GpsCoordinate => Self {
53 exact: 0.95,
54 close: 0.80,
55 partial: 0.50,
56 },
57 FieldKind::Categorical => Self {
58 exact: 1.00,
59 close: 0.95,
60 partial: 0.70,
61 },
62 FieldKind::FreeText => Self {
63 exact: 0.90,
64 close: 0.65,
65 partial: 0.35,
66 },
67 FieldKind::Alias => Self {
68 exact: 0.90,
69 close: 0.65,
70 partial: 0.35,
71 },
72 }
73 }
74
75 pub fn apply(&self, sim: f32) -> ComparisonLevel {
77 if sim >= self.exact {
78 ComparisonLevel::Exact
79 } else if sim >= self.close {
80 ComparisonLevel::Close
81 } else if sim >= self.partial {
82 ComparisonLevel::Partial
83 } else {
84 ComparisonLevel::None
85 }
86 }
87}
88
89#[cfg(test)]
90mod tests {
91 use super::*;
92
93 #[test]
94 fn name_thresholds_produce_correct_levels() {
95 let t = LevelThresholds::for_kind(FieldKind::Name);
96 assert_eq!(t.apply(0.95), ComparisonLevel::Exact);
97 assert_eq!(t.apply(0.80), ComparisonLevel::Close);
98 assert_eq!(t.apply(0.60), ComparisonLevel::Partial);
99 assert_eq!(t.apply(0.30), ComparisonLevel::None);
100 }
101
102 #[test]
103 fn categorical_is_binary() {
104 let t = LevelThresholds::for_kind(FieldKind::Categorical);
105 assert_eq!(t.apply(1.00), ComparisonLevel::Exact);
106 assert_eq!(t.apply(0.99), ComparisonLevel::Close);
107 assert_eq!(t.apply(0.00), ComparisonLevel::None);
108 }
109
110 #[test]
111 fn date_thresholds_tight_bands() {
112 let t = LevelThresholds::for_kind(FieldKind::Date);
113 assert_eq!(t.apply(1.0), ComparisonLevel::Exact);
115 assert_eq!(t.apply(0.9), ComparisonLevel::Close);
117 assert_eq!(t.apply(0.75), ComparisonLevel::Partial);
119 assert_eq!(t.apply(0.3), ComparisonLevel::None);
121 }
122
123 #[test]
124 fn boundary_values_are_exclusive_on_lower_bound() {
125 let t = LevelThresholds::for_kind(FieldKind::Name);
126 assert_eq!(t.apply(t.exact), ComparisonLevel::Exact);
128 assert_eq!(t.apply(t.exact - 0.01), ComparisonLevel::Close);
130 }
131}