1use crate::error::StrSimError;
2
3use super::{ExpectTokenizerType, StrSim, TokenizerType};
4
5use anyhow::Result;
6use derive_more::Display;
7
8#[derive(Display)]
9#[display(fmt = "Jaro")]
10pub struct Jaro;
11
12impl Jaro {
13 pub fn similarity(s1: &[char], s2: &[char]) -> f64 {
14 let max_len = s1.len().max(s2.len());
15
16 if max_len == 0 {
17 return 1.0;
18 } else if s1.len() == 0 || s2.len() == 0 {
19 return 0.0;
20 }
21
22 let search_range = ((max_len / 2) - 1).max(0); let mut flags_s1 = vec![false; s1.len()];
25 let mut flags_s2 = vec![false; s2.len()];
26
27 let mut common_chars = 0;
29
30 for i in 0..s1.len() {
31 let low = if i > search_range {
32 i - search_range
33 } else {
34 0
35 };
36 let high = (i + search_range).min(s2.len() - 1);
37 for j in low..=high {
38 if flags_s2[j] == false && s2[j] == s1[i] {
39 flags_s1[i] = true;
40 flags_s2[j] = true;
41 common_chars += 1;
42 break;
43 }
44 }
45 }
46
47 if common_chars == 0 {
48 return 0.0;
49 }
50
51 let mut trans_count = 0;
53 let mut k = 0;
54
55 for i in 0..s1.len() {
56 if flags_s1[i] == true {
57 let mut pivot = k;
58 for j in k..s2.len() {
59 if flags_s2[j] == true {
60 k = j + 1;
61 pivot = j;
62 break;
63 }
64 }
65 if s1[i] != s2[pivot] {
66 trans_count += 1;
67 }
68 }
69 }
70
71 trans_count /= 2;
72 return ((common_chars as f64) / (s1.len() as f64)
73 + (common_chars as f64) / (s2.len() as f64)
74 + ((common_chars - trans_count) as f64) / (common_chars as f64))
75 / 3.0;
76 }
77}
78
79impl StrSim<Vec<char>> for Jaro {
80 fn similarity_pre_tok2(
81 &self,
82 tokenized_key: &Vec<char>,
83 tokenized_query: &Vec<char>,
84 ) -> Result<f64, StrSimError> {
85 Ok(Jaro::similarity(tokenized_key, tokenized_query))
86 }
87}
88
89impl ExpectTokenizerType for Jaro {
90 fn get_expected_tokenizer_type(&self) -> TokenizerType {
91 TokenizerType::Seq(Box::new(None))
92 }
93}