cloakpipe_core/
resolver.rs1use crate::EntityCategory;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct ResolverConfig {
13 #[serde(default)]
15 pub enabled: bool,
16 #[serde(default = "default_threshold")]
18 pub threshold: f64,
19 #[serde(default = "default_min_prefix_len")]
21 pub min_prefix_len: usize,
22 #[serde(default)]
24 pub aliases: Vec<AliasGroup>,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct AliasGroup {
30 pub group: Vec<String>,
31}
32
33fn default_threshold() -> f64 {
34 0.90
35}
36
37fn default_min_prefix_len() -> usize {
38 4
39}
40
41impl Default for ResolverConfig {
42 fn default() -> Self {
43 Self {
44 enabled: false,
45 threshold: default_threshold(),
46 min_prefix_len: default_min_prefix_len(),
47 aliases: Vec::new(),
48 }
49 }
50}
51
52pub struct EntityResolver {
54 config: ResolverConfig,
55 alias_map: HashMap<String, String>,
57}
58
59impl EntityResolver {
60 pub fn new(config: ResolverConfig) -> Self {
61 let mut alias_map = HashMap::new();
62 for group in &config.aliases {
63 if group.group.len() < 2 {
64 continue;
65 }
66 let canonical = group.group[0].clone();
67 for variant in &group.group {
68 alias_map.insert(variant.to_lowercase().trim().to_string(), canonical.clone());
69 }
70 }
71 Self { config, alias_map }
72 }
73
74 pub fn resolve(
82 &self,
83 original: &str,
84 category: &EntityCategory,
85 existing_entries: &HashMap<String, EntityCategory>,
86 ) -> Option<String> {
87 if !self.config.enabled {
88 return None;
89 }
90
91 let normalized = original.to_lowercase().trim().to_string();
93 if let Some(canonical) = self.alias_map.get(&normalized) {
94 return Some(canonical.clone());
95 }
96
97 let mut best_match: Option<String> = None;
99 let mut best_score: f64 = 0.0;
100
101 for (existing_original, existing_category) in existing_entries {
102 if existing_category != category {
104 continue;
105 }
106
107 if existing_original == original {
109 continue;
110 }
111
112 let score = self.similarity(original, existing_original);
113 if score > best_score && score >= self.config.threshold {
114 best_score = score;
115 best_match = Some(existing_original.clone());
116 }
117 }
118
119 best_match
120 }
121
122 fn similarity(&self, a: &str, b: &str) -> f64 {
128 let a_lower = a.to_lowercase();
129 let b_lower = b.to_lowercase();
130
131 let mut score = strsim::jaro_winkler(&a_lower, &b_lower);
133
134 let shorter = a_lower.len().min(b_lower.len());
136 if shorter >= self.config.min_prefix_len
137 && (a_lower.starts_with(&b_lower) || b_lower.starts_with(&a_lower))
138 {
139 score += 0.08;
140 }
141
142 score.min(1.0)
144 }
145
146 pub fn is_enabled(&self) -> bool {
148 self.config.enabled
149 }
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155
156 fn enabled_config() -> ResolverConfig {
157 ResolverConfig {
158 enabled: true,
159 threshold: 0.90,
160 min_prefix_len: 4,
161 aliases: Vec::new(),
162 }
163 }
164
165 fn config_with_aliases() -> ResolverConfig {
166 ResolverConfig {
167 enabled: true,
168 threshold: 0.90,
169 min_prefix_len: 4,
170 aliases: vec![AliasGroup {
171 group: vec![
172 "Rishikesh Kumar".into(),
173 "Rishi".into(),
174 "Rishi kesh".into(),
175 ],
176 }],
177 }
178 }
179
180 #[test]
181 fn test_alias_resolution() {
182 let resolver = EntityResolver::new(config_with_aliases());
183 let existing = HashMap::new();
184
185 assert_eq!(
186 resolver.resolve("Rishi", &EntityCategory::Person, &existing),
187 Some("Rishikesh Kumar".into())
188 );
189 assert_eq!(
190 resolver.resolve("Rishi kesh", &EntityCategory::Person, &existing),
191 Some("Rishikesh Kumar".into())
192 );
193 assert_eq!(
195 resolver.resolve("rishi", &EntityCategory::Person, &existing),
196 Some("Rishikesh Kumar".into())
197 );
198 }
199
200 #[test]
201 fn test_alias_canonical_self_resolves() {
202 let resolver = EntityResolver::new(config_with_aliases());
203 let existing = HashMap::new();
204
205 assert_eq!(
206 resolver.resolve("Rishikesh Kumar", &EntityCategory::Person, &existing),
207 Some("Rishikesh Kumar".into())
208 );
209 }
210
211 #[test]
212 fn test_fuzzy_match_misspelling() {
213 let resolver = EntityResolver::new(enabled_config());
214 let mut existing = HashMap::new();
215 existing.insert("Rishikesh".to_string(), EntityCategory::Person);
216
217 let result = resolver.resolve("Rishiksh", &EntityCategory::Person, &existing);
219 assert_eq!(result, Some("Rishikesh".into()));
220 }
221
222 #[test]
223 fn test_fuzzy_match_prefix() {
224 let resolver = EntityResolver::new(enabled_config());
225 let mut existing = HashMap::new();
226 existing.insert("Rishikesh".to_string(), EntityCategory::Person);
227
228 let result = resolver.resolve("Rishi", &EntityCategory::Person, &existing);
230 assert_eq!(result, Some("Rishikesh".into()));
232 }
233
234 #[test]
235 fn test_no_cross_category_match() {
236 let resolver = EntityResolver::new(enabled_config());
237 let mut existing = HashMap::new();
238 existing.insert("Rishikesh".to_string(), EntityCategory::Location);
239
240 let result = resolver.resolve("Rishiksh", &EntityCategory::Person, &existing);
242 assert_eq!(result, None);
243 }
244
245 #[test]
246 fn test_no_match_different_entities() {
247 let resolver = EntityResolver::new(enabled_config());
248 let mut existing = HashMap::new();
249 existing.insert("John".to_string(), EntityCategory::Person);
250
251 let result = resolver.resolve("Alice", &EntityCategory::Person, &existing);
253 assert_eq!(result, None);
254 }
255
256 #[test]
257 fn test_disabled_resolver() {
258 let resolver = EntityResolver::new(ResolverConfig::default());
259 let mut existing = HashMap::new();
260 existing.insert("Rishikesh".to_string(), EntityCategory::Person);
261
262 let result = resolver.resolve("Rishiksh", &EntityCategory::Person, &existing);
263 assert_eq!(result, None);
264 }
265
266 #[test]
267 fn test_short_prefix_rejected() {
268 let resolver = EntityResolver::new(enabled_config());
269 let mut existing = HashMap::new();
270 existing.insert("Alice".to_string(), EntityCategory::Person);
271
272 let result = resolver.resolve("Al", &EntityCategory::Person, &existing);
274 assert_eq!(result, None);
276 }
277
278 #[test]
279 fn test_similarity_scores() {
280 let resolver = EntityResolver::new(enabled_config());
281
282 assert!(resolver.similarity("Rishikesh", "Rishiksh") > 0.90);
284 assert!(resolver.similarity("Rishi", "Rishikesh") > 0.90);
286 assert!(resolver.similarity("John", "Mumbai") < 0.60);
288 let john_joan = resolver.similarity("John", "Joan");
290 assert!(john_joan < 0.92); }
292}