similar_string/
lib.rs

1#![warn(missing_docs)]
2
3//! # Similar String - the library for finding string similarities
4//! 
5//! With this library you can easily find rate of similarity of two strings or array of strings.
6//! Under the hood LCS (length finding variant) algorithm is used with O(n * m) time complexity and O(min(n, m)) memory complexity.
7//! 
8//! # Example
9//! ```
10//! use similar_string::*;
11//! 
12//! // Compares similarity of two strings and returns similarity rating.
13//! // The rating is returned as a f64 value in range from 0.0 to 1.0.
14//! compare_similarity("age", "page"); // 0.75
15//! 
16//! let options = vec!["fill", "night", "ride"];
17//! 
18//! // The functions below return `None` if the provided slice is empty
19//! 
20//! // Finds the best match amongst the options
21//! // and returns match with it's rating
22//! find_best_similarity("fight", &options); // Some(("night", 0.8))
23//! 
24//! // Returns all the similarity ratings
25//! // of the provided options
26//! get_similarity_ratings("fight", &options); // Some([0.4, 0.8, 0.2])
27//! ```
28//! 
29//! # LCS Algorithm
30//! 
31//! You can also use the `lcs_length` that is used under the hood to compute length of longest common subsequence.
32//! 
33//! ```
34//! use similar_string::lcs_length;
35//! 
36//! // The longest common subsequence in this case is "one"
37//! lcs_length("longest", "stone"); // 3
38//! ```
39
40use std::cmp::max;
41
42#[inline]
43fn get_shorter_longer_strings(left: impl AsRef<str>, right: impl AsRef<str>) -> (String, String) {
44    if left.as_ref().len() < right.as_ref().len() {
45        (left.as_ref().to_string(), right.as_ref().to_string())
46    } else {
47        (right.as_ref().to_string(), left.as_ref().to_string())
48    }
49}
50
51/// Get length of the longest common subsequence
52/// ```
53/// use similar_string::lcs_length;
54/// 
55/// // The longest common subsequence in this case is "one"
56/// lcs_length("longest", "stone"); // 3
57/// ```
58pub fn lcs_length(left: impl AsRef<str>, right: impl AsRef<str>) -> usize {
59    let (left, right) = get_shorter_longer_strings(left, right);
60    let mut table = vec![vec![0 as usize; left.len() + 1]; 2];
61
62    for rletter in right.chars() {
63        for (col, lletter) in left.chars().enumerate() {
64            if rletter == lletter {
65                table[1][col + 1] = 1 + table[0][col];
66            } else {
67                table[1][col + 1] = max(table[0][col + 1], table[1][col]);
68            }
69        }
70        table[0] = table.pop().unwrap();
71        table.push(vec![0 as usize; left.len() + 1]);
72    }
73    *table[0].last().unwrap()
74}
75
76/// Get score of similarity of two certain strings
77/// # Example
78/// ```
79/// use similar_string::*;
80/// 
81/// // Compares similarity of two strings and returns similarity rating.
82/// // The rating is returned as a f64 value in range from 0.0 to 1.0.
83/// compare_similarity("age", "page"); // 0.75
84/// ```
85pub fn compare_similarity(left: impl AsRef<str>, right: impl AsRef<str>) -> f64 {
86    let (len1, len2) = (left.as_ref().len(), right.as_ref().len());
87    let lcs_len = lcs_length(left.as_ref(), right.as_ref());
88    let size = max(len1, len2);
89    // Empty strings should match
90    if size == 0 { 1.0 } else { lcs_len as f64 / size as f64 }
91}
92
93/// Find the string amongs the options that is the most similar to the target one
94/// 
95/// This function returns `None` if the provided options is an empty slice
96/// # Example
97/// ```
98/// use similar_string::*;
99/// 
100/// let options = vec!["fill", "night", "ride"];
101/// 
102/// // Finds the best match amongst the options
103/// // and returns match with it's rating
104/// find_best_similarity("fight", &options); // ("night", 0.8)
105/// ```
106pub fn find_best_similarity(taregt: impl AsRef<str>, options: &[impl AsRef<str>]) -> Option<(String, f64)> {
107    match options.len() {
108        0 => None,
109        _ => {
110            let mut high_score: f64 = -1.0;
111            let mut position: usize = 0;
112            for (index, option) in options.iter().enumerate() {
113                let score = compare_similarity(option.as_ref(), taregt.as_ref());
114                if score > high_score {
115                    high_score = score;
116                    position = index;
117                }
118            }
119            Some((options[position].as_ref().to_string(), high_score))
120        }
121    }
122}
123
124/// Get all similarity scores against the target string
125/// 
126/// This function returns `None` if the provided options is an empty slice
127/// # Example
128/// ```
129/// use similar_string::*;
130/// 
131/// let options = vec!["fill", "night", "ride"];
132/// 
133/// // Returns all the similarity ratings
134/// // of the provided options
135/// get_similarity_ratings("fight", &options); // [0.4, 0.8, 0.2]
136/// ```
137pub fn get_similarity_ratings(taregt: impl AsRef<str>, options: &[impl AsRef<str>]) -> Option<Vec<f64>> {
138    match options.len() {
139        0 => None,
140        _ => {
141            let mut result = vec![];
142            for option in options.iter() {
143                let score = compare_similarity(option.as_ref(), taregt.as_ref());
144                result.push(score);
145            }
146            Some(result)
147        }
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use std::{collections::BTreeSet, vec};
154
155    use crate::*;
156
157    #[test]
158    fn lcs_works() {
159        let length = lcs_length("longest", "stone");
160        assert_eq!(length, 3);
161    }
162
163    #[test]
164    fn identity_check() {
165        let score = compare_similarity("hello", "hello");
166        assert_eq!(score, 1.0);
167    }
168
169    #[test]
170    fn ratio_is_symetrical() {
171        let left = "longest";
172        let right = "stone";
173        let score1 = compare_similarity(left, right);
174        let score2 = compare_similarity(right, left);
175        assert_eq!(score1, score2);
176    }
177
178    #[test]
179    fn empty_strings() {
180        let score = compare_similarity("", "");
181        assert_eq!(score, 1.0);
182    }
183
184    #[test]
185    fn find_best() {
186        let target = "fight";
187        let options = vec!["blight", "night", "stride"];
188        let (matched, score) = find_best_similarity(target, &options).unwrap();
189        assert_eq!(matched, "night");
190        assert_eq!(score, 0.8);
191    }
192
193    #[test]
194    fn find_best_with_set() {
195        let target = format!("fight");
196        let mut options = BTreeSet::new();
197        options.insert("blight");
198        options.insert("night");
199        options.insert("stride");
200        let vector: Vec<_> = options.iter().collect();
201        let (matched, score) = find_best_similarity(target, &vector).unwrap();
202        assert_eq!(matched, "night");
203        assert_eq!(score, 0.8);
204    }
205
206    #[test]
207    fn similarity_ratings() {
208        let expected = vec![0.4, 0.8, 0.2];
209        let options = vec![
210            "fill",
211            "night",
212            "ride"
213        ];
214        let ratings = get_similarity_ratings("fight", &options).unwrap();
215        assert_eq!(expected, ratings);
216    }
217}