correct_word/lib.rs
1pub mod levenshtein;
2
3/// # Struct: Correct Word
4/// A struct used to represent the result of the correct function.
5/// It has the word and the confidence of the correction.
6/// The word is an optional string, because the function might not be able to correct the word, given the threshold.
7/// The confidence is the similarity between the input and the corrected word between 0 to 1.
8/// The higher the similarity, the better the correction.
9pub struct CorrectWord {
10 pub word: Option<String>,
11 pub confidence: f64,
12}
13
14/// # Enum: Algorithm
15/// This enum defines the algorithms that can be used to correct a word.
16///
17/// Currently, the following algorithms are supported:
18/// * Levenshtein: A simple algorithm that calculates the distance between two strings. The lower the distance, the better the correction.
19///
20/// # Example
21/// This enum is used as an argument to the correct function.
22/// ```
23/// use correct_word::correct_word;
24/// use correct_word::Algorithm;
25///
26/// let result = correct_word(Algorithm::Levenshtein, "hilo".to_string(), vec!["hello".to_string(), "world".to_string()], None);
27/// assert_eq!(result.word.unwrap(), "hello");
28/// ```
29pub enum Algorithm {
30 Levenshtein,
31}
32
33/// Correct a word from a list of options.
34/// Takes in a word and a list of options, and returns the best option.
35///
36/// # Arguments
37/// * `algorithm` - The algorithm to use to correct the word. The algorithm is an enum, as defined in the [Algorithm](enum.Algorithm.html) enum.
38/// * `input` - The word to correct.
39/// * `options` - A list of options to correct the word to.
40/// * `threshold` - The maximum distance between the input and the corrected word. If the distance is greater than the threshold, the function will return None.
41///
42/// # Returns
43///
44/// `[CorrectWord](type.CorrectWord.html) = (Option<String>, f64))`
45///
46/// This function returns a tuple of an optional string and a f64. The string is the corrected word, and the f64 is the similary between the input and the corrected word in the range from 0 to 1
47///
48/// # Example
49/// ```
50/// use correct_word::correct_word;
51/// use correct_word::Algorithm;
52///
53/// let result = correct_word(Algorithm::Levenshtein, "hilo".to_string(), vec!["hello".to_string(), "world".to_string()], Some(0.4));
54/// assert_eq!(result.word.unwrap(), "hello");
55/// ```
56///
57/// # Note
58///
59/// All the algorithms used in this crate return a distance between the input and the corrected word.
60/// The lower the distance, the better the correction.
61/// You can use the threshold to make sure that the function doesn't return a word that is too different from the input.
62/// By default, the threshold is 0.5.
63/// So, it is up to you to choose the level of tolerance you want.
64/// Usually, a threshold of 0.5 is a good value which is the default.
65pub fn correct_word(
66 algorithm: Algorithm,
67 input: String,
68 options: Vec<String>,
69 threshold: Option<f64>,
70) -> CorrectWord {
71 let mut best = String::new();
72 let mut best_now = 0.0;
73 options.iter().for_each(|option| {
74 let distance = match algorithm {
75 Algorithm::Levenshtein => {
76 levenshtein::levenshtein_similarity(input.to_string(), option.to_string())
77 }
78 };
79 if distance > best_now {
80 best = option.to_string();
81 best_now = distance;
82 }
83 });
84
85 if best_now < threshold.unwrap_or(0.5) {
86 CorrectWord {
87 word: None,
88 confidence: best_now,
89 }
90 } else {
91 CorrectWord {
92 word: Some(best),
93 confidence: best_now,
94 }
95 }
96}
97
98#[cfg(test)]
99mod tests {
100 use super::*;
101
102 #[test]
103 fn levenshtein_test() {
104 let result = correct_word(
105 Algorithm::Levenshtein,
106 "he".to_string(),
107 vec!["hello".to_string(), "world".to_string(), "hi".to_string()],
108 None,
109 );
110 assert_eq!(result.word.unwrap(), "hi");
111 }
112}