correct_word/
lib.rs

1pub mod levenshtein;
2
3/// # Struct: Correct Word
4/// A struct used to represent the result of the correct function.
5/// It has the word and the confidence of the correction.
6/// The word is an optional string, because the function might not be able to correct the word, given the threshold.
7/// The confidence is the similarity between the input and the corrected word between 0 to 1.
8/// The higher the similarity, the better the correction.
9pub struct CorrectWord {
10    pub word: Option<String>,
11    pub confidence: f64,
12}
13
14/// # Enum: Algorithm
15/// This enum defines the algorithms that can be used to correct a word.
16///
17/// Currently, the following algorithms are supported:
18/// * Levenshtein: A simple algorithm that calculates the distance between two strings. The lower the distance, the better the correction.
19///
20/// # Example
21/// This enum is used as an argument to the correct function.
22/// ```
23/// use correct_word::correct_word;
24/// use correct_word::Algorithm;
25///
26/// let result = correct_word(Algorithm::Levenshtein, "hilo".to_string(), vec!["hello".to_string(), "world".to_string()], None);
27/// assert_eq!(result.word.unwrap(), "hello");
28/// ```
29pub enum Algorithm {
30    Levenshtein,
31}
32
33/// Correct a word from a list of options.
34/// Takes in a word and a list of options, and returns the best option.
35///
36/// # Arguments
37/// * `algorithm` - The algorithm to use to correct the word. The algorithm is an enum, as defined in the [Algorithm](enum.Algorithm.html) enum.
38/// * `input` - The word to correct.
39/// * `options` - A list of options to correct the word to.
40/// * `threshold` - The maximum distance between the input and the corrected word. If the distance is greater than the threshold, the function will return None.
41///
42/// # Returns
43///
44/// `[CorrectWord](type.CorrectWord.html) = (Option<String>, f64))`
45///
46/// This function returns a tuple of an optional string and a f64. The string is the corrected word, and the f64 is the similary between the input and the corrected word in the range from 0 to 1
47///
48/// # Example
49/// ```
50/// use correct_word::correct_word;
51/// use correct_word::Algorithm;
52///
53/// let result = correct_word(Algorithm::Levenshtein, "hilo".to_string(), vec!["hello".to_string(), "world".to_string()], Some(0.4));
54/// assert_eq!(result.word.unwrap(), "hello");
55/// ```
56///
57/// # Note
58///
59/// All the algorithms used in this crate return a distance between the input and the corrected word.
60/// The lower the distance, the better the correction.
61/// You can use the threshold to make sure that the function doesn't return a word that is too different from the input.
62/// By default, the threshold is 0.5.
63/// So, it is up to you to choose the level of tolerance you want.
64/// Usually, a threshold of 0.5 is a good value which is the default.
65pub fn correct_word(
66    algorithm: Algorithm,
67    input: String,
68    options: Vec<String>,
69    threshold: Option<f64>,
70) -> CorrectWord {
71    let mut best = String::new();
72    let mut best_now = 0.0;
73    options.iter().for_each(|option| {
74        let distance = match algorithm {
75            Algorithm::Levenshtein => {
76                levenshtein::levenshtein_similarity(input.to_string(), option.to_string())
77            }
78        };
79        if distance > best_now {
80            best = option.to_string();
81            best_now = distance;
82        }
83    });
84
85    if best_now < threshold.unwrap_or(0.5) {
86        CorrectWord {
87            word: None,
88            confidence: best_now,
89        }
90    } else {
91        CorrectWord {
92            word: Some(best),
93            confidence: best_now,
94        }
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101
102    #[test]
103    fn levenshtein_test() {
104        let result = correct_word(
105            Algorithm::Levenshtein,
106            "he".to_string(),
107            vec!["hello".to_string(), "world".to_string(), "hi".to_string()],
108            None,
109        );
110        assert_eq!(result.word.unwrap(), "hi");
111    }
112}