1use std::fmt;
6use std::sync::OnceLock;
7use strsim::{jaro_winkler, normalized_levenshtein};
8
9static VALID_SOURCES_CACHE: OnceLock<Vec<String>> = OnceLock::new();
11
12fn ensure_sources_loaded() {
14 VALID_SOURCES_CACHE.get_or_init(|| {
15 vec![
17 "planet".to_string(),
19 "africa".to_string(),
21 "antarctica".to_string(),
22 "asia".to_string(),
23 "australia-oceania".to_string(),
24 "europe".to_string(),
25 "north-america".to_string(),
26 "south-america".to_string(),
27 "central-america".to_string(),
28 "europe/albania".to_string(),
30 "europe/andorra".to_string(),
31 "europe/austria".to_string(),
32 "europe/belarus".to_string(),
33 "europe/belgium".to_string(),
34 "europe/bosnia-herzegovina".to_string(),
35 "europe/bulgaria".to_string(),
36 "europe/croatia".to_string(),
37 "europe/cyprus".to_string(),
38 "europe/czech-republic".to_string(),
39 "europe/denmark".to_string(),
40 "europe/estonia".to_string(),
41 "europe/faroe-islands".to_string(),
42 "europe/finland".to_string(),
43 "europe/france".to_string(),
44 "europe/germany".to_string(),
45 "europe/great-britain".to_string(),
46 "europe/greece".to_string(),
47 "europe/hungary".to_string(),
48 "europe/iceland".to_string(),
49 "europe/ireland".to_string(),
50 "europe/isle-of-man".to_string(),
51 "europe/italy".to_string(),
52 "europe/kosovo".to_string(),
53 "europe/latvia".to_string(),
54 "europe/liechtenstein".to_string(),
55 "europe/lithuania".to_string(),
56 "europe/luxembourg".to_string(),
57 "europe/malta".to_string(),
58 "europe/moldova".to_string(),
59 "europe/monaco".to_string(),
60 "europe/montenegro".to_string(),
61 "europe/netherlands".to_string(),
62 "europe/north-macedonia".to_string(),
63 "europe/norway".to_string(),
64 "europe/poland".to_string(),
65 "europe/portugal".to_string(),
66 "europe/romania".to_string(),
67 "europe/russia".to_string(),
68 "europe/san-marino".to_string(),
69 "europe/serbia".to_string(),
70 "europe/slovakia".to_string(),
71 "europe/slovenia".to_string(),
72 "europe/spain".to_string(),
73 "europe/sweden".to_string(),
74 "europe/switzerland".to_string(),
75 "europe/turkey".to_string(),
76 "europe/ukraine".to_string(),
77 "europe/united-kingdom".to_string(),
78 "europe/vatican-city".to_string(),
79 "north-america/canada".to_string(),
81 "north-america/greenland".to_string(),
82 "north-america/mexico".to_string(),
83 "north-america/us".to_string(),
84 "asia/afghanistan".to_string(),
86 "asia/bangladesh".to_string(),
87 "asia/bhutan".to_string(),
88 "asia/cambodia".to_string(),
89 "asia/china".to_string(),
90 "asia/gcc-states".to_string(),
91 "asia/india".to_string(),
92 "asia/indonesia".to_string(),
93 "asia/iran".to_string(),
94 "asia/iraq".to_string(),
95 "asia/israel-and-palestine".to_string(),
96 "asia/japan".to_string(),
97 "asia/jordan".to_string(),
98 "asia/kazakhstan".to_string(),
99 "asia/kyrgyzstan".to_string(),
100 "asia/lebanon".to_string(),
101 "asia/malaysia-singapore-brunei".to_string(),
102 "asia/maldives".to_string(),
103 "asia/mongolia".to_string(),
104 "asia/myanmar".to_string(),
105 "asia/nepal".to_string(),
106 "asia/north-korea".to_string(),
107 "asia/pakistan".to_string(),
108 "asia/philippines".to_string(),
109 "asia/south-korea".to_string(),
110 "asia/sri-lanka".to_string(),
111 "asia/syria".to_string(),
112 "asia/taiwan".to_string(),
113 "asia/tajikistan".to_string(),
114 "asia/thailand".to_string(),
115 "asia/tibet".to_string(),
116 "asia/turkmenistan".to_string(),
117 "asia/uzbekistan".to_string(),
118 "asia/vietnam".to_string(),
119 "asia/yemen".to_string(),
120 ]
121 });
122}
123
124fn get_valid_sources_sync() -> &'static [String] {
129 ensure_sources_loaded();
131
132 VALID_SOURCES_CACHE
134 .get()
135 .map(|v| v.as_slice())
136 .unwrap_or(&[])
137}
138
139fn find_best_fuzzy_match(input: &str, candidates: &[String]) -> Option<String> {
150 if candidates.is_empty() {
151 return None;
152 }
153
154 let input_lower = input.to_lowercase();
155 let mut best_match = None;
156 let mut best_score = 0.0f64;
157
158 let min_threshold = 0.65;
160
161 for candidate in candidates {
162 let candidate_lower = candidate.to_lowercase();
163
164 let jw_score = jaro_winkler(&input_lower, &candidate_lower);
166
167 let lev_score = normalized_levenshtein(&input_lower, &candidate_lower);
169
170 let combined_score = (jw_score * 0.7) + (lev_score * 0.3);
172
173 let mut semantic_bonus = 0.0;
175
176 let prefix_len = input_lower.chars().count().min(7); if prefix_len >= 4 {
179 let input_prefix = input_lower.chars().take(prefix_len).collect::<String>();
180 let candidate_prefix = candidate_lower.chars().take(prefix_len).collect::<String>();
181
182 let prefix_similarity = normalized_levenshtein(&input_prefix, &candidate_prefix);
184 if prefix_similarity > 0.7 {
185 semantic_bonus += 0.2 * prefix_similarity;
186 }
187 }
188
189 if input_lower.len() >= 8 && candidate_lower.len() >= 8 {
191 let length_ratio = 1.0
192 - ((input_lower.len() as f64 - candidate_lower.len() as f64).abs()
193 / input_lower.len().max(candidate_lower.len()) as f64);
194 if length_ratio > 0.7 {
195 semantic_bonus += 0.1 * length_ratio;
196 }
197 }
198
199 if candidate_lower.contains('-') || candidate_lower.contains('/') {
201 let parts: Vec<&str> = candidate_lower.split(&['-', '/'][..]).collect();
202 for part in parts {
203 if part.len() >= 4 {
204 let part_similarity = jaro_winkler(&input_lower, part);
205 if part_similarity > 0.85 {
206 semantic_bonus += 0.12 * part_similarity; }
209 }
210 }
211 }
212
213 if input_lower.len() >= 8 && candidate_lower.len() <= 7 && !candidate_lower.contains('/') {
215 semantic_bonus -= 0.1;
216 }
217
218 let final_score = combined_score + semantic_bonus;
219
220 if final_score >= min_threshold && final_score > best_score {
221 best_score = final_score;
222 best_match = Some(candidate.clone());
223 }
224 }
225
226 best_match
227}
228
229pub fn suggest_correction(source: &str) -> Option<String> {
231 let valid_sources = get_valid_sources_sync();
233
234 for valid_source in valid_sources {
236 if valid_source.eq_ignore_ascii_case(source) {
237 return None; }
239 }
240
241 if !source.contains('/') {
243 for valid_source in valid_sources {
245 if let Some(slash_pos) = valid_source.find('/') {
246 let country_part = &valid_source[slash_pos + 1..];
247 if country_part.eq_ignore_ascii_case(source) {
248 return Some(valid_source.clone());
249 }
250 }
251 }
252
253 let mut continent_level: Vec<String> = Vec::new();
255 let mut country_level: Vec<String> = Vec::new();
256
257 for valid_source in valid_sources {
258 if valid_source.contains('/') {
259 country_level.push(valid_source.clone());
260 } else {
261 continent_level.push(valid_source.clone());
262 }
263 }
264
265 if source.len() >= 6 {
267 if let Some(match_result) = find_best_fuzzy_match(source, &continent_level) {
268 return Some(match_result);
269 }
270 }
271
272 if source.len() <= 6 {
274 if let Some(match_result) = find_best_fuzzy_match(source, &continent_level) {
275 let source_lower = source.to_lowercase();
277 let match_result_lower = match_result.to_lowercase();
278 let similarity = jaro_winkler(&source_lower, &match_result_lower);
279 if similarity > 0.8 {
280 return Some(match_result);
281 }
282 }
283 }
284
285 let country_names: Vec<String> = country_level
287 .iter()
288 .filter_map(|s| s.split('/').nth(1).map(|c| c.to_string()))
289 .collect();
290
291 if let Some(best_country) = find_best_fuzzy_match(source, &country_names) {
292 for full_path in &country_level {
294 if let Some(country_part) = full_path.split('/').nth(1) {
295 if country_part == best_country {
296 return Some(full_path.clone());
297 }
298 }
299 }
300 }
301
302 return find_best_fuzzy_match(source, valid_sources);
304 }
305
306 if let Some(slash_pos) = source.find('/') {
308 let continent = &source[..slash_pos];
309 let country = &source[slash_pos + 1..];
310
311 for valid_source in valid_sources {
313 if let Some(valid_slash_pos) = valid_source.find('/') {
314 let valid_country = &valid_source[valid_slash_pos + 1..];
315 if valid_country.eq_ignore_ascii_case(country) {
316 return Some(valid_source.clone());
318 }
319 }
320 }
321
322 let continents: Vec<String> = valid_sources
324 .iter()
325 .filter(|s| !s.contains('/'))
326 .cloned()
327 .collect();
328
329 if let Some(corrected_continent) = find_best_fuzzy_match(continent, &continents) {
330 if country.len() > 8
332 && !country
333 .chars()
334 .all(|c| c.is_ascii_alphanumeric() || c == '-')
335 {
336 return Some(corrected_continent);
337 }
338 return Some(corrected_continent);
340 }
341 }
342
343 find_best_fuzzy_match(source, valid_sources)
345}
346
347#[derive(Debug)]
349pub enum Error {
350 SourceNotFound(String),
352
353 DownloadFailed(String),
355
356 HttpError(String),
358
359 IoError(std::io::Error),
361
362 InvalidInput(String),
364
365 NetworkError(String),
367}
368
369impl fmt::Display for Error {
370 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
371 match self {
372 Error::SourceNotFound(source) => {
373 write!(f, "Source '{source}' not found or not supported")
374 }
375 Error::DownloadFailed(msg) => {
376 write!(f, "Download failed: {msg}")
377 }
378 Error::HttpError(msg) => {
379 write!(f, "HTTP error: {msg}")
380 }
381 Error::IoError(err) => {
382 write!(f, "I/O error: {err}")
383 }
384 Error::InvalidInput(msg) => {
385 write!(f, "Invalid input: {msg}")
386 }
387 Error::NetworkError(msg) => {
388 write!(f, "Network error: {msg}")
389 }
390 }
391 }
392}
393
394impl std::error::Error for Error {
395 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
396 match self {
397 Error::IoError(err) => Some(err),
398 _ => None,
399 }
400 }
401}
402
403impl From<std::io::Error> for Error {
404 fn from(err: std::io::Error) -> Self {
405 Error::IoError(err)
406 }
407}
408
409#[cfg(feature = "http")]
410impl From<reqwest::Error> for Error {
411 fn from(err: reqwest::Error) -> Self {
412 if err.is_connect() || err.is_timeout() {
413 Error::NetworkError(err.to_string())
414 } else {
415 Error::HttpError(err.to_string())
416 }
417 }
418}
419
420pub type Result<T> = std::result::Result<T, Error>;
422
423#[cfg(test)]
424mod tests {
425 use super::*;
426
427 #[test]
428 fn test_suggest_correction_fuzzy_matching() {
429 assert_eq!(
431 suggest_correction("antartica"),
432 Some("antarctica".to_string())
433 );
434 assert_eq!(
436 suggest_correction("austrailia"),
437 Some("australia-oceania".to_string())
438 );
439 assert_eq!(suggest_correction("eurpoe"), Some("europe".to_string()));
440 assert_eq!(suggest_correction("afirca"), Some("africa".to_string()));
441
442 assert_eq!(suggest_correction("plant"), Some("planet".to_string()));
444 assert_eq!(suggest_correction("plnet"), Some("planet".to_string()));
445 }
446
447 #[test]
448 fn test_suggest_correction_standalone_country_names() {
449 assert_eq!(
451 suggest_correction("monaco"),
452 Some("europe/monaco".to_string())
453 );
454 assert_eq!(
455 suggest_correction("belgium"),
456 Some("europe/belgium".to_string())
457 );
458 assert_eq!(
459 suggest_correction("germany"),
460 Some("europe/germany".to_string())
461 );
462 assert_eq!(
463 suggest_correction("france"),
464 Some("europe/france".to_string())
465 );
466 assert_eq!(
468 suggest_correction("MONACO"),
469 Some("europe/monaco".to_string())
470 );
471 assert_eq!(
472 suggest_correction("Belgium"),
473 Some("europe/belgium".to_string())
474 );
475 }
476
477 #[test]
478 fn test_suggest_correction_standalone_country_typos() {
479 assert_eq!(
481 suggest_correction("monac"),
482 Some("europe/monaco".to_string())
483 );
484 assert_eq!(
485 suggest_correction("belgum"),
486 Some("europe/belgium".to_string())
487 );
488 assert_eq!(
489 suggest_correction("germay"),
490 Some("europe/germany".to_string())
491 );
492 }
493
494 #[test]
495 fn test_suggest_correction_country_paths() {
496 assert_eq!(
498 suggest_correction("antartica/belgium"),
499 Some("europe/belgium".to_string())
500 );
501 assert_eq!(
502 suggest_correction("europ/france"),
503 Some("europe/france".to_string())
504 );
505 assert_eq!(
506 suggest_correction("eurpoe/germany"),
507 Some("europe/germany".to_string())
508 );
509 assert_eq!(
511 suggest_correction("europ/unknown-country"),
512 Some("europe".to_string())
513 );
514 }
515
516 #[test]
517 fn test_suggest_correction_no_match() {
518 assert_eq!(suggest_correction("totally-invalid-place"), None); assert_eq!(suggest_correction("europe"), None); assert_eq!(suggest_correction("a"), None); }
522
523 #[test]
524 fn test_suggest_correction_case_insensitive() {
525 assert_eq!(
526 suggest_correction("ANTARTICA"),
527 Some("antarctica".to_string())
528 );
529 assert_eq!(
530 suggest_correction("AntArTiCa"),
531 Some("antarctica".to_string())
532 );
533 assert_eq!(suggest_correction("EuRoPe"), None); }
535
536 #[test]
537 fn test_strsim_fuzzy_matching() {
538 let candidates = vec![
540 "australia-oceania".to_string(),
541 "austria".to_string(),
542 "europe/austria".to_string(),
543 "antarctica".to_string(),
544 ];
545
546 let result = find_best_fuzzy_match("austrailia", &candidates);
548
549 assert_eq!(result, Some("australia-oceania".to_string()));
550 }
551
552 #[test]
553 fn test_semantic_bonuses() {
554 let candidates = vec![
556 "austria".to_string(), "europe/austria".to_string(), "australia-oceania".to_string(), ];
560
561 let result = find_best_fuzzy_match("very-long-input-string", &candidates);
562 assert_ne!(result, Some("austria".to_string()));
564
565 let length_candidates = vec![
567 "short".to_string(),
568 "medium-length-string".to_string(),
569 "very-long-similar-length".to_string(),
570 ];
571
572 let result = find_best_fuzzy_match("very-long-similar-input", &length_candidates);
573 assert_eq!(result, Some("very-long-similar-length".to_string()));
575
576 let prefix_candidates = vec![
578 "australia-oceania".to_string(),
579 "antarctica".to_string(),
580 "africa".to_string(),
581 ];
582
583 let result = find_best_fuzzy_match("austr", &prefix_candidates);
584 assert_eq!(result, Some("australia-oceania".to_string()));
586 }
587}