spell_checker/
lib.rs

1// language: rust
2// Path: spell-checker/src/main.rs
3use std::collections::HashSet;
4use std::io::{BufReader, BufRead};
5use std::fs::File;
6use std::path::Path;
7
8pub struct Checker {
9    pub misspellings: u32,
10    pub text_words: u32,
11    dictionary_words: HashSet<String>,
12}
13
14impl Checker {
15    pub fn new(path: &Path) -> Self {
16        
17        let mut words = HashSet::new();
18        let file = File::open(Path::new(path)).unwrap();
19        let buffered = BufReader::new(file);
20        for line in buffered.lines() {
21            let line = line.unwrap();
22            if line.len() > 0 {
23                words.insert(line.trim().to_string().to_lowercase());
24            }
25        }
26        // if the dictionary can not load throw an error
27        if words.len() == 0 {
28            panic!("Could not load dictionary.");
29        }
30        Checker {
31            misspellings: 0,
32            text_words: 0,
33            dictionary_words: words,
34        }
35    }
36
37    pub fn len(&self) -> usize {
38        self.dictionary_words.len()
39    }
40
41    fn check(&self, word: &str) -> bool {
42        self.dictionary_words.contains(&word.to_lowercase())
43    }
44
45    pub fn check_file(&mut self, path: &Path) {   
46        let file = File::open(Path::new(path)).unwrap();
47        let buffered = BufReader::new(file);
48
49       
50        // scan each char in the file creating a word
51        for line in buffered.lines() {
52            let line = line.unwrap();
53            if line.len() > 0 {
54                let mut word = String::new();
55                for c in line.chars() {
56                    if (c.is_alphanumeric() || c == '\'' || c == '-') && !c.is_numeric() {
57                        word.push(c);                 
58                    } else {
59                        if word.len() > 0 && !word.starts_with('\'') && !word.starts_with('-') {     
60                            if word.contains("--") {
61                                let words = word.split("--").collect::<Vec<&str>>();
62                                for w in words {
63                                    if !self.check(w) {
64                                        self.misspellings += 1;
65                                        println!("{}", w);
66                                    }
67                                }
68                            }                      
69                            else if !self.check(&word)  {
70                                    self.misspellings += 1;
71                                    println!("{}", word);
72                    
73                            }
74                            self.text_words += 1;
75                            word = String::new();
76                        }
77                    }
78                }
79                if word.len() > 0 && !word.starts_with('\'') && !word.starts_with('-') {
80                    if word.contains("--") {
81                        let words = word.split("--").collect::<Vec<&str>>();
82                        for w in words {
83                            if !self.check(w) {
84                                self.misspellings += 1;
85                                println!("{}", w);
86                            }
87                        }
88                    }               
89                    else if !self.check(&word)  {
90                        self.misspellings += 1;
91                        println!("{}", word);
92                    }
93                    self.text_words += 1;
94                }
95            }
96        }
97    }
98
99
100}
101
102
103#[cfg(test)]
104mod tests {
105    use std::time::Instant;
106    use std::path::Path;
107    use super::Checker;
108   
109    #[test]
110    fn test_war_and_peace() {
111        let dictionary_file: &Path = Path::new("src/data/dictionary.txt");
112        let text_file: &Path = Path::new("src/data/wap.txt");
113
114
115        // if there is a second arg then use that as the dictionary file
116        
117        // start the timer for load
118        let before = Instant::now();
119        let mut dictionary = Checker::new(dictionary_file);
120        println!("TIME IN LOAD: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
121        
122        let before = Instant::now();
123        dictionary.len();
124        println!("TIME IN LENGTH: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
125
126        let before = Instant::now();
127        println!("MISSPELLED WORDS");
128        println!("-----------------");
129        dictionary.check_file(text_file);
130        println!("TIME IN CHECK: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
131        println!("WORDS IN MISSPELLED: {}", dictionary.misspellings);
132        println!("WORDS IN DICTIONARY: {}", dictionary.len());
133        println!("WORDS IN TEXT: {}", dictionary.text_words);
134    }
135
136    #[test]
137    fn test_alice() {
138        let dictionary_file: &Path = Path::new("src/data/dictionary.txt");
139        let text_file: &Path = Path::new("src/data/alice.txt");
140
141
142        // if there is a second arg then use that as the dictionary file
143        
144        // start the timer for load
145        let before = Instant::now();
146        let mut dictionary = Checker::new(dictionary_file);
147        println!("TIME IN LOAD: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
148        
149        let before = Instant::now();
150        dictionary.len();
151        println!("TIME IN LENGTH: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
152
153        let before = Instant::now();
154        println!("MISSPELLED WORDS");
155        println!("-----------------");
156        dictionary.check_file(text_file);
157        println!("TIME IN CHECK: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
158        println!("WORDS IN MISSPELLED: {}", dictionary.misspellings);
159        println!("WORDS IN DICTIONARY: {}", dictionary.len());
160        println!("WORDS IN TEXT: {}", dictionary.text_words);
161    }
162
163
164    #[test]
165    fn test_bible() {
166        let dictionary_file: &Path = Path::new("src/data/dictionary.txt");
167        let text_file: &Path = Path::new("src/data/bible.txt");
168
169
170        // if there is a second arg then use that as the dictionary file
171        
172        // start the timer for load
173        let before = Instant::now();
174        let mut dictionary = Checker::new(dictionary_file);
175        println!("TIME IN LOAD: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
176        
177        let before = Instant::now();
178        dictionary.len();
179        println!("TIME IN LENGTH: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
180
181        let before = Instant::now();
182        println!("MISSPELLED WORDS");
183        println!("-----------------");
184        dictionary.check_file(text_file);
185        println!("TIME IN CHECK: {:.2}", before.elapsed().as_millis() as f64 / 1000.);
186        println!("WORDS IN MISSPELLED: {}", dictionary.misspellings);
187        println!("WORDS IN DICTIONARY: {}", dictionary.len());
188        println!("WORDS IN TEXT: {}", dictionary.text_words);
189    }
190}