word_dictionary/
lib.rs

1/*!
2# Word Dictionary
3
4This crate provides a data structure for word mapping. It can be used for language translation.
5
6## Examples
7
8```rust
9use word_dictionary::Dictionary;
10
11let mut dictionary = Dictionary::new("tests/data/dictionary.txt"); // input a dictionary file
12
13// dictionary.read_data().unwrap(); // if the dictionary file already exists
14
15dictionary.add_edit("Althasol", "阿爾瑟索").unwrap();
16dictionary.add_edit("Aldun", "奧爾敦").unwrap();
17dictionary.add_edit("Alduin", "阿爾杜因").unwrap();
18dictionary.add_edit("Alduin", "奥杜因").unwrap();
19
20assert_eq!("阿爾瑟索", dictionary.get_right(dictionary.find_left_strictly("Althasol", 0).unwrap()).unwrap());
21assert_eq!("奧爾敦", dictionary.get_right(dictionary.find_left("dun", 0).unwrap()).unwrap());
22assert_eq!("奥杜因", dictionary.get_right(dictionary.find_left("Alduin", 0).unwrap()).unwrap());
23assert_eq!("阿爾杜因 --> 奥杜因", dictionary.get_all_right_to_string(dictionary.find_left("Alduin", 0).unwrap()).unwrap());
24
25// The dictionary file now would be
26/*
27Alduin = 阿爾杜因 --> 奥杜因
28Aldun = 奧爾敦
29Althasol = 阿爾瑟索
30*/
31```
32*/
33
34use std::{
35    fs::File,
36    io::{BufRead, BufReader, ErrorKind, Write},
37    path::PathBuf,
38};
39
40mod errors;
41
42pub use errors::*;
43use trim_in_place::TrimInPlace;
44
45#[derive(Debug)]
46pub struct Dictionary {
47    /// The path of the dictionary file.
48    path:  PathBuf,
49    /// Left data.
50    left:  Vec<String>,
51    /// Right data.
52    right: Vec<Vec<String>>,
53}
54
55impl Dictionary {
56    /// Create a new `Dictionary` instance. But not read the file data. Use the `read_data` method to read data file the input file.
57    #[inline]
58    pub fn new<P: Into<PathBuf>>(path: P) -> Dictionary {
59        Dictionary {
60            path: path.into(), left: Vec::new(), right: Vec::new()
61        }
62    }
63}
64
65impl Dictionary {
66    /// Get the count of words.
67    #[inline]
68    pub fn count(&self) -> usize {
69        debug_assert_eq!(self.left.len(), self.right.len());
70
71        self.left.len()
72    }
73
74    /// Get the all right words.
75    #[inline]
76    pub fn get_all_right(&self, index: usize) -> Option<&[String]> {
77        self.right.get(index).map(|v| v.as_slice())
78    }
79
80    /// Get the all right words.
81    #[inline]
82    pub fn get_all_right_to_string(&self, index: usize) -> Option<String> {
83        self.right.get(index).map(|v| v.join(" --> "))
84    }
85
86    /// Get the last right word at a specific index.
87    #[inline]
88    pub fn get_right(&self, index: usize) -> Option<&str> {
89        match self.right.get(index) {
90            Some(v) => v.last().map(|s| s.as_str()),
91            None => None,
92        }
93    }
94
95    /// Get the left word at a specific index
96    #[inline]
97    pub fn get_left(&self, index: usize) -> Option<&str> {
98        self.left.get(index).map(|s| s.as_str())
99    }
100}
101
102impl Dictionary {
103    /// Find a word by a keyword.
104    #[inline]
105    pub fn find_left_strictly<S: AsRef<str>>(&self, s: S, mut start_index: usize) -> Option<usize> {
106        let size = self.count();
107
108        if size == 0 {
109            return None;
110        }
111
112        start_index %= size;
113
114        let s = s.as_ref();
115
116        for _ in 0..size {
117            let tmp = &self.left[start_index];
118
119            if tmp.eq_ignore_ascii_case(s) {
120                return Some(start_index);
121            }
122
123            start_index += 1;
124
125            if start_index == size {
126                start_index = 0;
127            }
128        }
129
130        None
131    }
132
133    /// Find a word by a keyword.
134    #[inline]
135    pub fn find_left<S: AsRef<str>>(&self, s: S, mut start_index: usize) -> Option<usize> {
136        let size = self.count();
137
138        if size == 0 {
139            return None;
140        }
141
142        start_index %= size;
143
144        let s = s.as_ref();
145
146        let s_upper_case = s.to_uppercase();
147        let s_lower_case = s.to_lowercase();
148
149        for _ in 0..size {
150            let tmp = &self.left[start_index];
151
152            let tmp_upper_case = tmp.to_uppercase();
153
154            if tmp_upper_case.contains(&s_upper_case) {
155                return Some(start_index);
156            }
157
158            let tmp_lower_case = tmp.to_lowercase();
159
160            if tmp_lower_case.contains(&s_lower_case) {
161                return Some(start_index);
162            }
163
164            start_index += 1;
165
166            if start_index == size {
167                start_index = 0;
168            }
169        }
170
171        None
172    }
173
174    /// Find a word by a keyword.
175    #[inline]
176    pub fn find_right_strictly<S: AsRef<str>>(
177        &self,
178        s: S,
179        mut start_index: usize,
180    ) -> Option<usize> {
181        let size = self.count();
182
183        if size == 0 {
184            return None;
185        }
186
187        start_index %= size;
188
189        let s = s.as_ref();
190
191        for _ in 0..size {
192            for tmp in self.right[start_index].iter().rev() {
193                if tmp.eq_ignore_ascii_case(s) {
194                    return Some(start_index);
195                }
196            }
197
198            start_index += 1;
199
200            if start_index == size {
201                start_index = 0;
202            }
203        }
204
205        None
206    }
207
208    /// Find a word by a keyword.
209    #[inline]
210    pub fn find_right<S: AsRef<str>>(&self, s: S, mut start_index: usize) -> Option<usize> {
211        let size = self.count();
212
213        if size == 0 {
214            return None;
215        }
216
217        start_index %= size;
218
219        let s = s.as_ref();
220
221        let s_upper_case = s.to_uppercase();
222        let s_lower_case = s.to_lowercase();
223
224        for _ in 0..size {
225            for tmp in self.right[start_index].iter().rev() {
226                let tmp_upper_case = tmp.to_uppercase();
227
228                if tmp_upper_case.contains(&s_upper_case) {
229                    return Some(start_index);
230                }
231
232                let tmp_lower_case = tmp.to_lowercase();
233
234                if tmp_lower_case.contains(&s_lower_case) {
235                    return Some(start_index);
236                }
237            }
238
239            start_index += 1;
240
241            if start_index == size {
242                start_index = 0;
243            }
244        }
245
246        None
247    }
248}
249
250impl Dictionary {
251    /// Read the dictionary from the dictionary file.
252    pub fn read_data(&mut self) -> Result<(), ReadError> {
253        let file = match File::open(&self.path) {
254            Ok(file) => file,
255            Err(err) if err.kind() == ErrorKind::NotFound => {
256                // it is okay with a file not found error
257                return Ok(());
258            },
259            Err(err) => return Err(err.into()),
260        };
261
262        let mut reader = BufReader::new(file);
263
264        let mut buffer = String::new();
265
266        let mut line_counter = 1;
267
268        loop {
269            buffer.clear();
270
271            let c = reader.read_line(&mut buffer)?;
272
273            if c == 0 {
274                break;
275            }
276
277            buffer.trim_in_place();
278
279            if buffer.is_empty() {
280                continue;
281            }
282
283            let mut tokenizer = buffer.split('=');
284
285            let left_string = tokenizer.next().unwrap();
286
287            if left_string.contains("-->") {
288                return Err(ReadError::Broken {
289                    line:        line_counter,
290                    left_string: String::from(left_string),
291                    reason:      BrokenReason::BadLeftString,
292                });
293            }
294
295            let left_string = left_string.trim_end();
296
297            // the format of the left string has been checked
298
299            if let Some(index) = self.find_left_strictly(left_string, 0) {
300                return Err(ReadError::Broken {
301                    line:        line_counter,
302                    left_string: String::from(left_string),
303                    reason:      BrokenReason::Duplicated {
304                        another_left_string: String::from(self.left[index].as_str()),
305                    },
306                });
307            }
308
309            let right_string = match tokenizer.next() {
310                Some(right_string) => right_string,
311                None => {
312                    return Err(ReadError::Broken {
313                        line:        line_counter,
314                        left_string: String::from(left_string),
315                        reason:      BrokenReason::NoRightString,
316                    })
317                },
318            };
319
320            if tokenizer.next().is_some() {
321                return Err(ReadError::Broken {
322                    line:        line_counter,
323                    left_string: String::from(left_string),
324                    reason:      BrokenReason::BadRightString {
325                        right_string: String::from(right_string),
326                    },
327                });
328            }
329
330            let mut right_strings: Vec<String> = Vec::with_capacity(1);
331
332            for s in right_string.split("-->").map(|s| s.trim()) {
333                if s.is_empty() {
334                    return Err(ReadError::Broken {
335                        line:        line_counter,
336                        left_string: String::from(left_string),
337                        reason:      BrokenReason::BadRightString {
338                            right_string: String::from(right_string),
339                        },
340                    });
341                }
342
343                right_strings.push(String::from(s));
344            }
345
346            self.left.push(String::from(left_string));
347            self.right.push(right_strings);
348
349            line_counter += 1;
350        }
351
352        Ok(())
353    }
354}
355
356impl Dictionary {
357    /// Write this dictionary to its dictionary file.
358    pub fn write_data(&mut self) -> Result<(), WriteError> {
359        let mut file = File::create(&self.path)?;
360
361        let size = self.count();
362
363        if size > 0 {
364            let size_dec = size - 1;
365
366            // When doing exchange sort, it also writes data to file.
367            for i in 0..size_dec {
368                let mut left = self.left[i].to_uppercase();
369
370                for j in (i + 1)..size {
371                    let left_2 = self.left[j].to_uppercase();
372
373                    if left > left_2 {
374                        self.left.swap(i, j);
375
376                        self.right.swap(i, j);
377
378                        left = left_2;
379                    }
380                }
381
382                writeln!(file, "{} = {}", self.left[i], self.right[i].join(" --> "))?;
383            }
384
385            write!(file, "{} = {}", self.left[size_dec], self.right[size_dec].join(" --> "))?;
386        }
387
388        Ok(())
389    }
390
391    /// Delete a word.
392    #[inline]
393    pub fn delete(&mut self, index: usize) -> Result<bool, WriteError> {
394        if index < self.count() {
395            self.left.remove(index);
396            self.right.remove(index);
397
398            self.write_data()?;
399
400            Ok(true)
401        } else {
402            Ok(false)
403        }
404    }
405
406    /// Add or edit a word. If the left word exists, then update it, and return `Ok(false)`.
407    pub fn add_edit<L: AsRef<str>, R: AsRef<str>>(
408        &mut self,
409        left: L,
410        right: R,
411    ) -> Result<bool, WriteError> {
412        let left = left.as_ref().trim();
413        let right = right.as_ref().trim();
414
415        if left.contains("-->") || left.contains('=') {
416            Err(WriteError::BadLeftString)
417        } else if right.contains("-->") || right.contains('=') {
418            Err(WriteError::BadRightString)
419        } else if left == right {
420            Err(WriteError::Same)
421        } else if let Some(index) = self.find_left_strictly(left, 0) {
422            if self.get_right(index).unwrap() == right {
423                Err(WriteError::Duplicated)
424            } else {
425                self.right.get_mut(index).unwrap().push(String::from(right));
426
427                self.write_data()?;
428
429                Ok(false)
430            }
431        } else {
432            self.left.push(String::from(left));
433            self.right.push(vec![String::from(right)]);
434
435            self.write_data()?;
436
437            Ok(true)
438        }
439    }
440}