zer_compare/similarity/
numeric.rs1use zer_core::{record::FieldValue, schema::FieldKind};
2
3use crate::similarity::SimilarityFn;
4
5pub struct NumericBucketedSimilarity;
18
19fn extract_numeric(v: &FieldValue) -> Option<f64> {
20 match v {
21 FieldValue::Float(f) => Some(*f),
22 FieldValue::Int(i) => Some(*i as f64),
23 FieldValue::Text(s) => s.trim().parse::<f64>().ok(),
24 _ => None,
25 }
26}
27
28fn numeric_score(va: f64, vb: f64) -> f32 {
29 let diff = (va - vb).abs();
30 if diff == 0.0 { return 1.0; }
31 let denom = va.abs().max(vb.abs()).max(1.0);
32 let rel_diff = diff / denom;
33 if rel_diff <= 0.05 { 0.85 }
34 else if rel_diff <= 0.20 { 0.6 }
35 else if rel_diff <= 0.50 { 0.3 }
36 else { 0.0 }
37}
38
39impl SimilarityFn for NumericBucketedSimilarity {
40 fn similarity(&self, a: &FieldValue, b: &FieldValue) -> f32 {
41 match (extract_numeric(a), extract_numeric(b)) {
42 (Some(va), Some(vb)) => numeric_score(va, vb),
43 _ => 0.0,
44 }
45 }
46 fn similarity_str(&self, a: &str, b: &str) -> f32 {
47 match (a.trim().parse::<f64>(), b.trim().parse::<f64>()) {
48 (Ok(va), Ok(vb)) => numeric_score(va, vb),
49 _ => 0.0,
50 }
51 }
52 fn field_kind(&self) -> FieldKind { FieldKind::Numeric }
53}
54
55#[cfg(test)]
56mod tests {
57 use super::*;
58
59 fn ti(n: i64) -> FieldValue { FieldValue::Int(n) }
60 fn tf(f: f64) -> FieldValue { FieldValue::Float(f) }
61 fn tv(s: &str) -> FieldValue { FieldValue::Text(s.into()) }
62
63 #[test]
64 fn exact_int_match() {
65 let sim = NumericBucketedSimilarity;
66 assert_eq!(sim.similarity(&ti(180), &ti(180)), 1.0);
67 }
68
69 #[test]
70 fn close_within_5_percent() {
71 let sim = NumericBucketedSimilarity;
72 assert_eq!(sim.similarity(&ti(180), &ti(183)), 0.85);
74 }
75
76 #[test]
77 fn medium_within_20_percent() {
78 let sim = NumericBucketedSimilarity;
79 assert_eq!(sim.similarity(&ti(100), &ti(115)), 0.6);
81 }
82
83 #[test]
84 fn large_within_50_percent() {
85 let sim = NumericBucketedSimilarity;
86 assert_eq!(sim.similarity(&ti(100), &ti(140)), 0.3);
88 }
89
90 #[test]
91 fn very_different() {
92 let sim = NumericBucketedSimilarity;
93 assert_eq!(sim.similarity(&ti(100), &ti(300)), 0.0);
94 }
95
96 #[test]
97 fn float_parsing_from_text() {
98 let sim = NumericBucketedSimilarity;
99 assert_eq!(sim.similarity(&tv("52.345"), &tv("52.346")), 0.85);
101 }
102
103 #[test]
104 fn mixed_int_float() {
105 let sim = NumericBucketedSimilarity;
106 assert_eq!(sim.similarity(&ti(100), &tf(100.0)), 1.0);
107 }
108
109 #[test]
110 fn null_returns_zero() {
111 let sim = NumericBucketedSimilarity;
112 assert_eq!(sim.similarity(&FieldValue::Null, &ti(100)), 0.0);
113 }
114}