magic_string_search/
lib.rs

1use std::cmp::max;
2
3fn lcs_collected(a: &str, b: &str, n: usize, m: usize, dp: &mut Vec<Vec<i32>>) -> Vec<char> {
4    let mut i = n;
5    let mut j = m;
6    let mut result = Vec::new();
7    while i > 0 && j > 0 {
8        if a.chars().nth(i - 1).unwrap() == b.chars().nth(j - 1).unwrap() {
9            result.push(a.chars().nth(i - 1).unwrap());
10            i -= 1;
11            j -= 1;
12        } else if dp[i - 1][j] > dp[i][j - 1] {
13            i -= 1;
14        } else {
15            j -= 1;
16        }
17    }
18    result.reverse();
19    result
20}
21
22fn create_dp(a: &str, b: &str) -> Vec<Vec<i32>> {
23    let n = a.len();
24    let m = b.len();
25    let mut dp = vec![vec![0; m + 1]; n + 1];
26    for i in 1..=n {
27        for j in 1..=m {
28            if a.chars().nth(i - 1).unwrap() == b.chars().nth(j - 1).unwrap() {
29                dp[i][j] = dp[i - 1][j - 1] + 1;
30            } else {
31                dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
32            }
33        }
34    }
35    dp
36}
37
38fn normalized_len_compare(subject: &str, sequence: &str) -> f64 {
39    let min_window = sequence.len();
40    let mut window = min_window;
41
42    if min_window > subject.len() || min_window == 0{
43        return 0.0;
44    }
45   
46    while window <= subject.len() {
47        let mut i = 0;
48        while i + window <= subject.len() {
49            let sub = &subject[i..i + window];
50            let n = sub.len();
51            let m = sequence.len();
52            let mut dp = create_dp(sub, sequence);
53            let lcs = lcs_collected(sub, sequence, n, m, &mut dp);
54            if lcs.len() == sequence.len() {
55                return min_window as f64 / window as f64;
56            }
57            i += 1;
58        }
59        window += 1;
60    }
61    return 0.0;
62}
63
64/// Compares two strings and returns a score based on the longest common subsequence (LCS) and normalized length comparison.
65/// 
66/// # Arguments
67///
68/// * `str1` - A reference to the first string.
69/// * `q` - A reference to the query string.
70///
71/// # Returns
72///
73/// * A floating-point score representing the similarity between `str1` and `q`.
74///
75/// # Examples
76///
77/// ```
78/// let score = compare("AGGTAB", "AGGTAB");
79/// assert_eq!(score, 6.0);
80/// ```
81pub fn compare(str1: &str, q: &str) -> f64 {
82    let len1 = str1.len();
83    let len2 = q.len();
84    if len1 == 0 || len2 == 0 {
85        return 0.0;
86    }
87
88    let mut dp = create_dp(str1, q);
89    let lcs = lcs_collected(str1, q, len1, len2, &mut dp).
90        iter().collect::<String>();
91
92    if lcs.len() <= 1 {
93        return 0.0;
94    }
95
96    lcs.len() as f64 * (normalized_len_compare(str1, &lcs))
97}
98
99/// Ranks a list of strings based on their similarity to a query string using the `compare` function.
100///
101/// # Arguments
102///
103/// * `query` - A reference to the query string.
104/// * `subjects` - A vector of references to the subject strings to be ranked.
105///
106/// # Returns
107///
108/// * A vector of tuples containing the similarity score and the corresponding subject string, sorted by score in descending order.
109///
110/// # Examples
111///
112/// ```
113/// let query = "Ad";
114/// let subjects = vec!["Ad_Fields", "Users", "Aged_groups"];
115/// let result = rank(query, subjects);
116/// assert_eq!(result, vec![(2.0, "Ad_Fields"), (1.0, "Aged_groups"), (0.0, "Users")]);
117/// ```
118pub fn rank<'a>(query: &str, subjects: Vec<&'a str>) -> Vec<(f64, &'a str)> {
119    let mut result = subjects
120        .iter()
121        .map(|&subject| (compare(subject, query), subject))
122        .collect::<Vec<(f64, &str)>>();
123    result.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
124    result
125}
126
127/// Ranks a list of structured objects based on their similarity to a query string using a custom accessor function.
128///
129/// # Arguments
130///
131/// * `query` - A reference to the query string.
132/// * `subjects` - A vector of structured objects to be ranked.
133/// * `accessor` - A function that takes a reference to a structured object and returns a string for comparison.
134///
135/// # Returns
136///
137/// * A vector of tuples containing the similarity score and the corresponding structured object, sorted by score in descending order.
138///
139/// # Examples
140///
141/// ```
142/// struct Person {
143///     name: String,
144/// }
145///
146/// let query = "Ad";
147/// let subjects = vec![
148///     Person { name: "Ad_Fields".to_string() },
149///     Person { name: "Users".to_string() },
150///     Person { name: "Aged_groups".to_string() },
151/// ];
152/// let result = struct_rank(query, subjects, |p| p.name.clone());
153/// assert_eq!(result[0].0, 2.0);
154/// assert_eq!(result[0].1.name, "Ad_Fields");
155/// ```
156pub fn struct_rank<'a, T: Clone>(
157    query: &str,
158    subjects: Vec<T>,
159    accessor: fn(&T) -> String,
160) -> Vec<(f64, T)> {
161    let mut result = subjects
162        .iter()
163        .map(|subject| (compare(&accessor(subject), query), subject.clone()))
164        .collect::<Vec<(f64, T)>>();
165    result.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
166    result
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn test_lcs_collected() {
175        let a = "AGGTAB";
176        let b = "GXTXAYB";
177        let n = a.len();
178        let m = b.len();
179        let mut dp = create_dp(a, b);
180        let result = lcs_collected(a, b, n, m, &mut dp);
181        assert_eq!(result, vec!['G', 'T', 'A', 'B']);
182    }
183
184    #[test]
185    fn test_lcs_collected_with_no_match() {
186        let a = "AGGTAB";
187        let b = "XYZ";
188        let n = a.len();
189        let m = b.len();
190        let mut dp = create_dp(a, b);
191        let result = lcs_collected(a, b, n, m, &mut dp);
192        assert_eq!(result, Vec::new());
193    }
194
195    #[test]
196    fn test_create_dp() {
197        let a = "AGGTAB";
198        let b = "GXTXAYB";
199        let dp = create_dp(a, b);
200        assert_eq!(dp[6][7], 4);
201    }
202
203    #[test]
204    fn test_normalized_len_compare_with_perfect_match() {
205        let subject = "AGGTAB";
206        let sequence = "AGGTAB";
207        let result = normalized_len_compare(subject, sequence);
208        assert_eq!(result, 1.0);
209    }
210
211    #[test]
212    fn test_normalized_len_compare_with_no_match() {
213        let subject = "AGGTAB";
214        let sequence = "GXT";
215        let result = normalized_len_compare(subject, sequence);
216        assert_eq!(result, 0.0);
217    }
218
219    #[test]
220    fn test_normalized_len_compare_with_partial_match() {
221        let subject = "AGGTAB";
222        let sequence = "GT";
223        let result = normalized_len_compare(subject, sequence);
224        assert_eq!(result, 1.0);
225    }
226
227    #[test]
228    fn test_normalized_len_compare_with_spread_out_match() {
229        let subject = "AGGTAB";
230        let sequence = "GA";
231        let result = normalized_len_compare(subject, sequence);
232        assert_eq!(result, (2.0 / 3.0));
233    }
234
235    #[test]
236    fn test_compare_with_a_perfect_match() {
237        let str1 = "AGGTAB";
238        let q = "AGGTAB";
239        let result = compare(str1, q);
240        assert_eq!(result, 6.0);
241    }
242
243    #[test]
244    fn test_compare_with_partial_match() {
245        let str1 = "Ad_Fields";
246        let q = "Ad";
247        let result = compare(str1, q);
248        assert_eq!(result, 2.0);
249    }
250
251    #[test]
252    fn test_compare_with_no_match() {
253        let str1 = "AGGTAB";
254        let q = "XYZ";
255        let result = compare(str1, q);
256        assert_eq!(result, 0.0);
257    }
258
259    #[test]
260    fn test_compare_with_spread_out_match() {
261        let str1 = "AGGTAB";
262        let q = "GA";
263        let result = compare(str1, q);
264        assert_eq!(result, 2.0 / 3.0 * 2.0);
265    }
266
267    #[test]
268    fn test_rank() {
269        let query = "Ad";
270        let subjects = vec!["Ad_Fields", "Users", "Aged_groups"];
271        let result = rank(query, subjects);
272        assert_eq!(
273            result,
274            vec![(2.0, "Ad_Fields"), (1.0, "Aged_groups"), (0.0, "Users"),]
275        );
276    }
277}