zer_compare/similarity/
date.rs1use zer_core::{record::FieldValue, schema::FieldKind};
2
3use crate::similarity::SimilarityFn;
4
5pub struct DateSimilarity;
16
17fn parse_date(s: &str) -> Option<(i32, u32, u32)> {
19 let s = s.trim();
20 let date_part = s.split('T').next().unwrap_or(s);
22 let parts: Vec<&str> = date_part.split('-').collect();
23 if parts.len() >= 3 {
24 if let (Ok(y), Ok(m), Ok(d)) = (
25 parts[0].parse::<i32>(),
26 parts[1].parse::<u32>(),
27 parts[2].parse::<u32>(),
28 ) {
29 if m >= 1 && m <= 12 && d >= 1 && d <= 31 {
30 return Some((y, m, d));
31 }
32 }
33 }
34 if let Ok(ts) = s.parse::<i64>() {
36 let days_since_epoch = ts / 86400;
37 let y = 1970 + (days_since_epoch / 365) as i32;
39 return Some((y, 1, 1));
40 }
41 None
42}
43
44fn to_julian(y: i32, m: u32, d: u32) -> i32 {
46 let a = (14_i32 - m as i32) / 12;
47 let y2 = y + 4800 - a;
48 let m2 = m as i32 + 12 * a - 3;
49 d as i32 + (153 * m2 + 2) / 5 + 365 * y2 + y2 / 4 - y2 / 100 + y2 / 400 - 32045
50}
51
52fn days_between(a: (i32, u32, u32), b: (i32, u32, u32)) -> i32 {
53 (to_julian(a.0, a.1, a.2) - to_julian(b.0, b.1, b.2)).abs()
54}
55
56fn date_score(sa: &str, sb: &str) -> f32 {
57 if sa == sb { return 1.0; }
58 let (da, db) = match (parse_date(sa), parse_date(sb)) {
59 (Some(a), Some(b)) => (a, b),
60 _ => return 0.0,
61 };
62 let diff = days_between(da, db);
63 if diff == 0 { 1.0 }
64 else if diff <= 1 { 0.9 }
65 else if da.0 == db.0 && da.1 == db.1 { 0.75 }
66 else if da.0 == db.0 { 0.5 }
67 else if (da.0 - db.0).abs() <= 1 { 0.3 }
68 else { 0.0 }
69}
70
71impl SimilarityFn for DateSimilarity {
72 fn similarity(&self, a: &FieldValue, b: &FieldValue) -> f32 {
73 let (sa, sb) = match (a, b) {
74 (FieldValue::Text(a), FieldValue::Text(b)) => (a.as_str(), b.as_str()),
75 _ => return 0.0,
76 };
77 date_score(sa, sb)
78 }
79 fn similarity_str(&self, a: &str, b: &str) -> f32 { date_score(a, b) }
80 fn field_kind(&self) -> FieldKind { FieldKind::Date }
81}
82
83#[cfg(test)]
84mod tests {
85 use super::*;
86
87 fn tv(s: &str) -> FieldValue { FieldValue::Text(s.into()) }
88
89 #[test]
90 fn exact_date_match() {
91 let sim = DateSimilarity;
92 assert_eq!(sim.similarity(&tv("1990-06-15"), &tv("1990-06-15")), 1.0);
93 }
94
95 #[test]
96 fn off_by_one_day() {
97 let sim = DateSimilarity;
98 assert_eq!(sim.similarity(&tv("1990-06-15"), &tv("1990-06-16")), 0.9);
99 assert_eq!(sim.similarity(&tv("1990-06-15"), &tv("1990-06-14")), 0.9);
100 }
101
102 #[test]
103 fn same_year_month_different_day() {
104 let sim = DateSimilarity;
105 assert_eq!(sim.similarity(&tv("1990-06-15"), &tv("1990-06-20")), 0.75);
106 }
107
108 #[test]
109 fn same_year_different_month() {
110 let sim = DateSimilarity;
111 assert_eq!(sim.similarity(&tv("1990-06-15"), &tv("1990-09-01")), 0.5);
112 }
113
114 #[test]
115 fn age_compatible_within_one_year() {
116 let sim = DateSimilarity;
117 assert_eq!(sim.similarity(&tv("1990-06-15"), &tv("1991-01-01")), 0.3);
118 assert_eq!(sim.similarity(&tv("1990-01-01"), &tv("1989-07-20")), 0.3);
119 }
120
121 #[test]
122 fn completely_different_dates() {
123 let sim = DateSimilarity;
124 assert_eq!(sim.similarity(&tv("1990-06-15"), &tv("1975-03-22")), 0.0);
125 }
126
127 #[test]
128 fn missing_field_returns_zero() {
129 let sim = DateSimilarity;
130 assert_eq!(sim.similarity(&FieldValue::Null, &tv("1990-06-15")), 0.0);
131 assert_eq!(sim.similarity(&tv("1990-06-15"), &FieldValue::Null), 0.0);
132 }
133
134 #[test]
135 fn timestamp_date_part_comparison() {
136 let sim = DateSimilarity;
137 assert_eq!(sim.similarity(&tv("1990-06-15T08:30:00"), &tv("1990-06-15T14:00:00")), 1.0);
139 }
140}