magic_string_search/
lib.rs1use std::cmp::max;
2
3fn lcs_collected(a: &str, b: &str, n: usize, m: usize, dp: &mut Vec<Vec<i32>>) -> Vec<char> {
4 let mut i = n;
5 let mut j = m;
6 let mut result = Vec::new();
7 while i > 0 && j > 0 {
8 if a.chars().nth(i - 1).unwrap() == b.chars().nth(j - 1).unwrap() {
9 result.push(a.chars().nth(i - 1).unwrap());
10 i -= 1;
11 j -= 1;
12 } else if dp[i - 1][j] > dp[i][j - 1] {
13 i -= 1;
14 } else {
15 j -= 1;
16 }
17 }
18 result.reverse();
19 result
20}
21
22fn create_dp(a: &str, b: &str) -> Vec<Vec<i32>> {
23 let n = a.len();
24 let m = b.len();
25 let mut dp = vec![vec![0; m + 1]; n + 1];
26 for i in 1..=n {
27 for j in 1..=m {
28 if a.chars().nth(i - 1).unwrap() == b.chars().nth(j - 1).unwrap() {
29 dp[i][j] = dp[i - 1][j - 1] + 1;
30 } else {
31 dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
32 }
33 }
34 }
35 dp
36}
37
38fn normalized_len_compare(subject: &str, sequence: &str) -> f64 {
39 let min_window = sequence.len();
40 let mut window = min_window;
41
42 if min_window > subject.len() || min_window == 0{
43 return 0.0;
44 }
45
46 while window <= subject.len() {
47 let mut i = 0;
48 while i + window <= subject.len() {
49 let sub = &subject[i..i + window];
50 let n = sub.len();
51 let m = sequence.len();
52 let mut dp = create_dp(sub, sequence);
53 let lcs = lcs_collected(sub, sequence, n, m, &mut dp);
54 if lcs.len() == sequence.len() {
55 return min_window as f64 / window as f64;
56 }
57 i += 1;
58 }
59 window += 1;
60 }
61 return 0.0;
62}
63
64pub fn compare(str1: &str, q: &str) -> f64 {
82 let len1 = str1.len();
83 let len2 = q.len();
84 if len1 == 0 || len2 == 0 {
85 return 0.0;
86 }
87
88 let mut dp = create_dp(str1, q);
89 let lcs = lcs_collected(str1, q, len1, len2, &mut dp).
90 iter().collect::<String>();
91
92 if lcs.len() <= 1 {
93 return 0.0;
94 }
95
96 lcs.len() as f64 * (normalized_len_compare(str1, &lcs))
97}
98
99pub fn rank<'a>(query: &str, subjects: Vec<&'a str>) -> Vec<(f64, &'a str)> {
119 let mut result = subjects
120 .iter()
121 .map(|&subject| (compare(subject, query), subject))
122 .collect::<Vec<(f64, &str)>>();
123 result.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
124 result
125}
126
127pub fn struct_rank<'a, T: Clone>(
157 query: &str,
158 subjects: Vec<T>,
159 accessor: fn(&T) -> String,
160) -> Vec<(f64, T)> {
161 let mut result = subjects
162 .iter()
163 .map(|subject| (compare(&accessor(subject), query), subject.clone()))
164 .collect::<Vec<(f64, T)>>();
165 result.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
166 result
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172
173 #[test]
174 fn test_lcs_collected() {
175 let a = "AGGTAB";
176 let b = "GXTXAYB";
177 let n = a.len();
178 let m = b.len();
179 let mut dp = create_dp(a, b);
180 let result = lcs_collected(a, b, n, m, &mut dp);
181 assert_eq!(result, vec!['G', 'T', 'A', 'B']);
182 }
183
184 #[test]
185 fn test_lcs_collected_with_no_match() {
186 let a = "AGGTAB";
187 let b = "XYZ";
188 let n = a.len();
189 let m = b.len();
190 let mut dp = create_dp(a, b);
191 let result = lcs_collected(a, b, n, m, &mut dp);
192 assert_eq!(result, Vec::new());
193 }
194
195 #[test]
196 fn test_create_dp() {
197 let a = "AGGTAB";
198 let b = "GXTXAYB";
199 let dp = create_dp(a, b);
200 assert_eq!(dp[6][7], 4);
201 }
202
203 #[test]
204 fn test_normalized_len_compare_with_perfect_match() {
205 let subject = "AGGTAB";
206 let sequence = "AGGTAB";
207 let result = normalized_len_compare(subject, sequence);
208 assert_eq!(result, 1.0);
209 }
210
211 #[test]
212 fn test_normalized_len_compare_with_no_match() {
213 let subject = "AGGTAB";
214 let sequence = "GXT";
215 let result = normalized_len_compare(subject, sequence);
216 assert_eq!(result, 0.0);
217 }
218
219 #[test]
220 fn test_normalized_len_compare_with_partial_match() {
221 let subject = "AGGTAB";
222 let sequence = "GT";
223 let result = normalized_len_compare(subject, sequence);
224 assert_eq!(result, 1.0);
225 }
226
227 #[test]
228 fn test_normalized_len_compare_with_spread_out_match() {
229 let subject = "AGGTAB";
230 let sequence = "GA";
231 let result = normalized_len_compare(subject, sequence);
232 assert_eq!(result, (2.0 / 3.0));
233 }
234
235 #[test]
236 fn test_compare_with_a_perfect_match() {
237 let str1 = "AGGTAB";
238 let q = "AGGTAB";
239 let result = compare(str1, q);
240 assert_eq!(result, 6.0);
241 }
242
243 #[test]
244 fn test_compare_with_partial_match() {
245 let str1 = "Ad_Fields";
246 let q = "Ad";
247 let result = compare(str1, q);
248 assert_eq!(result, 2.0);
249 }
250
251 #[test]
252 fn test_compare_with_no_match() {
253 let str1 = "AGGTAB";
254 let q = "XYZ";
255 let result = compare(str1, q);
256 assert_eq!(result, 0.0);
257 }
258
259 #[test]
260 fn test_compare_with_spread_out_match() {
261 let str1 = "AGGTAB";
262 let q = "GA";
263 let result = compare(str1, q);
264 assert_eq!(result, 2.0 / 3.0 * 2.0);
265 }
266
267 #[test]
268 fn test_rank() {
269 let query = "Ad";
270 let subjects = vec!["Ad_Fields", "Users", "Aged_groups"];
271 let result = rank(query, subjects);
272 assert_eq!(
273 result,
274 vec![(2.0, "Ad_Fields"), (1.0, "Aged_groups"), (0.0, "Users"),]
275 );
276 }
277}