1use std::collections::{HashSet, HashMap};
5use std::path::Path;
6use std::fs::File;
8use diesel::RunQueryDsl;
11use diesel::prelude::*;
12
13use crate::attack::database::{Database, DatabaseSession, NewLanguage, NewWord};
14use crate::cipher::common::normalize_text;
15use crate::{Result, ErrorKind, ResultExt};
16use crate::schema::languages;
18use crate::schema::languages::dsl::*;
19use crate::schema::words;
20use crate::schema::words::dsl::*;
21use std::io::Read;
24use std::iter::FromIterator;
25
26
27pub struct Dictionary {
35 pub language: String,
36 language_id: i32,
37 database: Database
38}
39
40impl Dictionary {
41 pub fn remove_dictionary<T>(_language: T)-> Result<()>
48 where T: AsRef<str> {
49 let database = Database::new()?;
50 diesel::delete(languages::table.filter(language.eq(_language.as_ref())))
51 .execute(&database.session)
52 .chain_err(|| ErrorKind::DatabaseError(String::from("Error deleting language.")))?;
53 Ok(())
54 }
55
56 pub fn get_dictionaries_names()-> Result<Vec<String>> {
61 let database = Database::new()?;
62 let dictionaries_names = languages::table.select(languages::language)
63 .load::<String>(&database.session)
64 .chain_err(|| ErrorKind::DatabaseError(String::from("Language list could not be retrieved.")))?;
65 Ok(dictionaries_names)
66 }
67
68 pub fn new<T>(_language: T, create: bool)-> Result<Self>
75 where T: AsRef<str> {
76 let new_language = _language.as_ref().to_string();
77 let current_database = Database::new()?;
78 let mut current_dictionary = Dictionary {
79 language: new_language.clone(),
80 language_id: 0,
81 database:current_database
82 };
83 if current_dictionary.already_created() {
84 current_dictionary.language_id = languages::table.filter(language.eq(¤t_dictionary.language))
85 .select(languages::id)
86 .first::<i32>(current_dictionary.session())
87 .expect("Language that does not exists in database yet.");
88 } else {
89 if create {
90 current_dictionary.create_dictionary();
91 } else {
92 bail!(ErrorKind::NotExistingLanguage(new_language.clone()))
93 }
94 }
95 Ok(current_dictionary)
96 }
97
98 pub fn session(&self) -> &DatabaseSession {
100 &self.database.session
101 }
102
103 pub fn add_word<T>(&mut self, _word: T)
110 where T: AsRef<str> {
111 let new_word = NewWord {
112 word: _word.as_ref(),
113 word_pattern: get_word_pattern(_word.as_ref()),
114 language_id: self.language_id
115 };
116 diesel::insert_into(words::table)
117 .values(&new_word)
118 .execute(self.session())
119 .expect("Error saving new word.");
120 }
121
122 pub fn add_multiple_words(&mut self, _words: &HashSet<String>){
127 let mut word_list: Vec<NewWord> = Vec::new();
128 _words.iter().map(|new_word| {
129 let word_to_add = NewWord {
130 word: new_word,
131 word_pattern: get_word_pattern(new_word),
132 language_id: self.language_id,
133 };
134 word_list.push(word_to_add);
135 }).for_each(drop);
136 diesel::insert_into(words::table)
137 .values(&word_list)
138 .execute(self.session())
139 .expect("Error saving new word.");
140 }
141
142 pub fn remove_word<T>(&mut self, _word: T)
149 where T: AsRef<str> {
150 diesel::delete(words::table.filter(word.eq(_word.as_ref()).and(language_id.eq(&self.language_id))))
151 .execute(self.session())
152 .expect("Error deleting word");
153 }
154
155 pub fn word_exists<T>(&self, _word: T) -> bool
163 where T: AsRef<str> {
164 let _word_clone = _word.as_ref().clone();
165 if let Ok(count) = words::table.filter(word.eq(_word.as_ref()).and(language_id.eq(&self.language_id)))
166 .count()
167 .first::<i64>(self.session()) {
168 if count > 0 {true} else {false}
169 } else {
170 false
171 }
172 }
173
174 pub fn populate<T>(&mut self, file_pathname: T)-> Result<()>
179 where T: AsRef<Path> {
180 let _words = get_words_from_text_file(file_pathname.as_ref())?;
181 self.add_multiple_words(&_words);
182 Ok(())
183 }
184
185 fn already_created(&self)-> bool {
190 if let Ok(_) = languages::table.filter(language.eq(&self.language))
191 .select(languages::id)
192 .first::<i32>(self.session()) {
193 true
194 } else {
195 false
196 }
197 }
198
199 fn create_dictionary(&mut self) {
201 let new_language = NewLanguage {language: self.language.as_str()};
202 diesel::insert_into(languages::table)
203 .values(&new_language)
204 .execute(self.session())
205 .expect("Error saving new language.");
206 self.language_id = languages::table.filter(language.eq(&self.language))
207 .select(languages::id)
208 .first::<i32>(self.session())
209 .expect("Error getting newly created language id.");
210 }
211
212 pub fn get_words_with_pattern<T>(&self, pattern: T) -> Result<Vec<String>>
220 where T: AsRef<str> {
221 let words_result = words::table.filter(word_pattern.eq(pattern.as_ref()))
223 .select(word)
224 .get_results::<String>(self.session());
225 match words_result {
226 Ok(_words) => Ok(_words),
227 Err(e) => bail!(format!("{}",e))
228 }
229 }
230
231 pub fn get_words_presence(&self, _words: &HashSet<String>) -> f64 {
239 let total_words = _words.len();
240 let current_hits: usize = _words.iter()
241 .map(|_word| if self.word_exists(_word) { 1 } else { 0 })
242 .sum();
243 let presence: f64 = current_hits as f64 / total_words as f64;
244 presence
245 }
246
247 pub fn get_all_words(&self) -> Result<Vec<String>> {
249 let words_result = words::table
250 .filter(language_id.eq(self.language_id))
251 .select(word)
252 .get_results::<String>(self.session());
253 match words_result {
254 Ok(_words) => Ok(_words),
255 Err(e) => bail!(format!("{}",e))
256 }
257 }
258}
259
260pub fn get_word_pattern<T>(_word: T) -> String
270 where T: AsRef<str> {
271 let mut char_order = InsertionOrderedSet::new();
272 _word.as_ref().chars()
273 .for_each(|_char| {
274 char_order.insert(_char.to_string());
275 });
276 let chars_indexed: Vec<&String> = char_order.iter().collect();
277 let pattern: Vec<usize> = _word.as_ref().chars()
278 .map(|_char|
279 chars_indexed.iter().position(|&x|
280 x.as_str().to_string() == _char.to_string()))
281 .filter(|option|
282 match option {
283 None => false,
284 _ => true })
285 .map(|option|
286 match option {
287 Some(x) => x,
288 None => 0 })
290 .collect();
291 let pattern_string = pattern.iter()
292 .map(|x| x.to_string())
293 .collect::<Vec<String>>()
294 .join(".");
295 pattern_string
296}
297
298struct InsertionOrderedSet<T> {
303 elements: Vec<T>
304}
305
306impl<T> InsertionOrderedSet<T> {
307
308 pub fn new() -> Self {
310 Self {
311 elements: Vec::new()
312 }
313 }
314
315 pub fn insert(&mut self, new_element: T)
322 where T: PartialEq {
323 if !self.contains(&new_element) {
324 self.elements.push(new_element);
325 }
326 }
327
328 pub fn contains(&self, element_to_find: &T) -> bool
336 where T: PartialEq {
337 self.elements.contains(element_to_find)
338 }
339
340 pub fn iter(&self) -> InsertionOrderedSetIterator<T> {
341 InsertionOrderedSetIterator {
342 set: self,
343 index: 0
344 }
345 }
346}
347
348struct InsertionOrderedSetIterator<'a, T: 'a>{
349 set: &'a InsertionOrderedSet<T>,
350 index: usize
351}
352
353impl<'a, T> Iterator for InsertionOrderedSetIterator<'a, T> {
354 type Item = &'a T;
355
356 fn next(&mut self) -> Option<Self::Item> {
357 if let Some(value) = self.set.elements.get(self.index) {
358 self.index += 1;
359 Some(value)
360 } else {
361 None
362 }
363 }
364}
365
366pub fn get_words_from_text_file<T>(file_pathname: T) -> Result<HashSet<String>>
374 where T: AsRef<Path> {
375 let mut file_content = String::new();
376 let mut file_to_read = File::open(file_pathname.as_ref())
377 .chain_err(|| ErrorKind::IOError(file_pathname.as_ref().to_string_lossy().to_string()))?;
378 file_to_read.read_to_string(&mut file_content)
379 .chain_err(|| ErrorKind::IOError(file_pathname.as_ref().to_string_lossy().to_string()))?;
380 let words_set = get_words_from_text(file_content);
381 Ok(words_set)
382}
383
384pub fn get_words_from_text<T>(text: T)-> HashSet<String>
395 where T: AsRef<str> {
396 let words_list = normalize_text(text);
397 let words_set = HashSet::from_iter(words_list.iter().cloned());
398 words_set
399}
400
401pub struct IdentifiedLanguage {
408 pub(crate) winner: Option<String>,
409 pub(crate) winner_probability: Option<f64>,
410 candidates: HashMap<String, f64>
411}
412
413pub fn identify_language<T>(text: T)-> Result<IdentifiedLanguage>
424 where T: AsRef<str> {
425 let _words = get_words_from_text(&text);
426 let candidates = get_candidates_frecuency(&_words)?;
427 if let Some(winner) = get_winner(&candidates){
428 let winner_probability = *(candidates.get(winner.as_str()).unwrap());
429 Ok(IdentifiedLanguage {
430 winner: Some(winner),
431 winner_probability: Some(winner_probability),
432 candidates
433 })
434 } else {
435 Ok(IdentifiedLanguage {
436 winner: None,
437 winner_probability: None,
438 candidates
439 })
440 }
441}
442
443fn get_candidates_frecuency(_words: &HashSet<String>)-> Result<HashMap<String, f64>> {
453 let total_words = _words.len();
454 let mut candidates: HashMap<String, f64> = HashMap::new();
455 for _language in Dictionary::get_dictionaries_names()? {
456 let dictionary = Dictionary::new(&_language, false)
457 .chain_err(|| ErrorKind::DatabaseError(String::from("Error opening language dictionary")))?;
458 let current_hits: u64 = _words.iter().map(|_word| if dictionary.word_exists(_word) {1} else {0}).sum();
459 let frequency = current_hits as f64 / total_words as f64;
460 candidates.insert(_language, frequency);
461 }
462 Ok(candidates)
463}
464
465fn get_winner(candidates: &HashMap<String, f64>)-> Option<String> {
472 let mut current_winner = None;
473 let mut current_highest_frequency = 0_f64;
474 for (candidate_name, frequency) in candidates {
475 if *frequency > current_highest_frequency {
476 current_winner = Some(candidate_name.clone());
477 current_highest_frequency = *frequency;
478 }
479 }
480 current_winner
481}
482
483pub fn get_best_result(identified_languages: &Vec<Result<(usize, IdentifiedLanguage)>>)-> usize {
491 let mut current_best_key: usize = 0;
492 let mut current_best_key_probability: f64 = 0.0;
493 for result in identified_languages {
494 if let Ok((current_key, identified_language)) = result {
495 if let Some(_) = identified_language.winner {
496 if let Some(winner_probability) = identified_language.winner_probability {
497 if winner_probability > current_best_key_probability {
498 current_best_key = *current_key;
499 current_best_key_probability = winner_probability;
500 }
501 }
502 }
503 }
504 }
505 current_best_key
506}
507
508#[cfg(test)]
531pub mod tests {
532 use super::*;
542 use std::fs::{create_dir, File, OpenOptions, read_to_string};
543 use std::env;
544 use test_common::fs::ops::{copy_files};
545 use test_common::fs::tmp::TestEnvironment;
546 use test_common::system::env::TemporalEnvironmentVariable;
547 use rstest::*;
548 use std::ffi::OsString;
549 use std::path::{Path, PathBuf};
550 use std::io::{Write, BufReader, Read};
551 use crate::attack::database;
552 use std::env::temp_dir;
553
554
555 const TEXT_FILE_NAME: &'static str = "text_to_load.txt";
556 const ENGLISH_TEXT_WITHOUT_PUNCTUATIONS_MARKS: &'static str = "This eBook is for the use of anyone anywhere at no cost and with
557almost no restrictions whatsoever You may copy it give it away or
558re use it under the terms of the Project Gutenberg License included
559with this eBook or online at";
560 pub const ENGLISH_TEXT_WITH_PUNCTUATIONS_MARKS: &'static str = "This eBook is for the use of anyone anywhere at no cost and with
561almost no restrictions whatsoever.You may copy it, give it away or
562re-use it under the terms of the Project Gutenberg License included
563with this eBook or online at 2020";
564 const SPANISH_TEXT_WITHOUT_PUNCTUATIONS_MARKS: &'static str = "Todavía lo recuerdo como si aquello hubiera sucedido ayer llegó á las
565puertas de la posada estudiando su aspecto afanosa y atentamente
566seguido por su maleta que alguien conducía tras él en una carretilla de
567mano Era un hombre alto fuerte pesado con un moreno pronunciado
568color de avellana Su trenza ó coleta alquitranada le caía sobre los
569hombros de su nada limpia blusa marina Sus manos callosas destrozadas
570y llenas de cicatrices enseñaban las extremidades de unas uñas rotas y
571negruzcas Y su rostro moreno llevaba en una mejilla aquella gran
572cicatriz de sable sucia y de un color blanquizco lívido y repugnante
573Todavía lo recuerdo paseando su mirada investigadora en torno del
574cobertizo silbando mientras examinaba y prorrumpiendo en seguida en
575aquella antigua canción marina que tan á menudo le oí cantar después";
576 const SPANISH_TEXT_WITH_PUNCTUATIONS_MARKS: &'static str = "Todavía lo recuerdo como si aquello hubiera sucedido ayer: llegó á las
577puertas de la posada estudiando su aspecto, afanosa y atentamente,
578seguido por su maleta que alguien conducía tras él en una carretilla de
579mano. Era un hombre alto, fuerte, pesado, con un moreno pronunciado,
580color de avellana. Su trenza ó coleta alquitranada le caía sobre los
581hombros de su nada limpia blusa marina. Sus manos callosas, destrozadas
582y llenas de cicatrices enseñaban las extremidades de unas uñas rotas y
583negruzcas. Y su rostro moreno llevaba en una mejilla aquella gran
584cicatriz de sable, sucia y de un color blanquizco, lívido y repugnante.
585Todavía lo recuerdo, paseando su mirada investigadora en torno del
586cobertizo, silbando mientras examinaba y prorrumpiendo, en seguida, en
587aquella antigua canción marina que tan á menudo le oí cantar después:";
588 const FRENCH_TEXT_WITHOUT_PUNCTUATIONS_MARKS: &'static str = "Combien le lecteur tandis que commodément assis au coin de son feu
589il s amuse à feuilleter les pages d un roman combien il se rend peu
590compte des fatigues et des angoisses de l auteur Combien il néglige de
591se représenter les longues nuits de luttes contre des phrases rétives
592les séances de recherches dans les bibliothèques les correspondances
593avec d érudits et illisibles professeurs allemands en un mot tout
594l énorme échafaudage que l auteur a édifié et puis démoli simplement
595pour lui procurer à lui lecteur quelques instants de distraction au
596coin de son feu ou encore pour lui tempérer l ennui d une heure en
597wagon";
598 const FRENCH_TEXT_WITH_PUNCTUATIONS_MARKS: &'static str = "Combien le lecteur,--tandis que, commodément assis au coin de son feu,
599il s'amuse à feuilleter les pages d'un roman,--combien il se rend peu
600compte des fatigues et des angoisses de l'auteur! Combien il néglige de
601se représenter les longues nuits de luttes contre des phrases rétives,
602les séances de recherches dans les bibliothèques, les correspondances
603avec d'érudits et illisibles professeurs allemands, en un mot tout
604l'énorme échafaudage que l'auteur a édifié et puis démoli, simplement
605pour lui procurer, à lui, lecteur, quelques instants de distraction au
606coin de son feu, ou encore pour lui tempérer l'ennui d'une heure en
607wagon!";
608 const GERMAN_TEXT_WITHOUT_PUNCTUATIONS_MARKS: &'static str = "Da unser Gutsherr Mr Trelawney Dr Livesay und die übrigen Herren
609mich baten alle Einzelheiten über die Schatzinsel von Anfang bis zu
610Ende aufzuschreiben und nichts auszulassen als die Lage der Insel und
611auch die nur weil noch ungehobene Schätze dort liegen nehme ich im
612Jahre die Feder zur Hand und beginne bei der Zeit als mein Vater
613noch den Gasthof Zum Admiral Benbow hielt und jener dunkle alte
614Seemann mit dem Säbelhieb über der Wange unter unserem Dache Wohnung
615nahm";
616 const GERMAN_TEXT_WITH_PUNCTUATIONS_MARKS: &'static str = "Da unser Gutsherr, Mr. Trelawney, Dr. Livesay und die übrigen Herren
617mich baten, alle Einzelheiten über die Schatzinsel von Anfang bis zu
618Ende aufzuschreiben und nichts auszulassen als die Lage der Insel, und
619auch die nur, weil noch ungehobene Schätze dort liegen, nehme ich im
620Jahre 17.. die Feder zur Hand und beginne bei der Zeit, als mein Vater
621noch den Gasthof „Zum Admiral Benbow“ hielt und jener dunkle, alte
622Seemann mit dem Säbelhieb über der Wange unter unserem Dache Wohnung
623nahm.";
624
625 const LANGUAGES: [&'static str; 4] = ["english", "spanish", "french", "german"];
626
627 pub struct MicroDictionaries {
628 pub(crate) _languages: HashMap<String, Vec<String>>
629 }
630
631 impl MicroDictionaries {
632 pub fn new() -> Self {
633 let mut _languages: HashMap<String, Vec<String>> = HashMap::new();
634 _languages.insert("english".to_string(), vec!["yes".to_string(),
635 "no".to_string(),
636 "dog".to_string(),
637 "cat".to_string(),
638 "snake".to_string()]);
639 _languages.insert("spanish".to_string(), vec!["si".to_string(),
640 "no".to_string(),
641 "perro".to_string(),
642 "gato".to_string()]);
643 _languages.insert("french".to_string(), vec!["qui".to_string(),
644 "non".to_string(),
645 "chien".to_string(),
646 "chat".to_string()]);
647 _languages.insert("german".to_string(), vec!["ja".to_string(),
648 "nein".to_string(),
649 "hund".to_string(),
650 "katze".to_string()]);
651 MicroDictionaries{_languages}
652 }
653 }
654
655 pub struct LoadedDictionaries {
657 pub temp_dir: PathBuf,
658 pub languages: Vec<String>,
659 temp_env: TestEnvironment,
660 temp_env_var: TemporalEnvironmentVariable
661 }
662
663 impl LoadedDictionaries {
664 pub fn new()-> Self {
665 let (temp_env, temp_env_var) = temporary_database_folder(None);
666 database::create_database();
667 let temp_dir = temp_env.path().to_owned();
668 let mut resources_path = temp_dir.clone();
669 resources_path.push("resources");
670 create_dir(&resources_path);
671 let mut source_path = env::current_dir()
672 .expect("Could not get current working dir");
673 source_path.push("resources");
674 copy_files(LANGUAGES.iter()
675 .map(|x| format!("{}/{}_book.txt", source_path.to_str().expect("Path contains non unicode characters"), x))
676 .collect(),
677 resources_path.as_path().as_os_str().to_str()
678 .expect("Path contains not unicode characters."))
679 .expect("Error copying books to temporal folder.");
680 for _language in LANGUAGES.iter() {
681 let mut dictionary = Dictionary::new(_language, true)
682 .expect(format!("No dictionary found for {} language.", _language).as_str());
683 let mut language_book = resources_path.clone();
684 language_book.push(format!("{}_book.txt", _language));
685 dictionary.populate(language_book);
686 }
687 let mut _languages = Vec::new();
688 LANGUAGES.iter().map(|x| _languages.push(x.to_string())).collect::<Vec<_>>();
689 LoadedDictionaries{
690 temp_dir,
691 languages: _languages,
692 temp_env,
693 temp_env_var
694 }
695 }
696 }
697
698 #[fixture]
700 pub fn full_loaded_temp_dictionaries()-> LoadedDictionaries {
701 LoadedDictionaries::new()
702 }
703
704 fn get_text_tuples()-> Vec<(&'static str, &'static str, &'static str)> {
706 vec![
707 ("english", ENGLISH_TEXT_WITH_PUNCTUATIONS_MARKS, ENGLISH_TEXT_WITHOUT_PUNCTUATIONS_MARKS),
708 ("spanish", SPANISH_TEXT_WITH_PUNCTUATIONS_MARKS, SPANISH_TEXT_WITHOUT_PUNCTUATIONS_MARKS),
709 ("french", FRENCH_TEXT_WITH_PUNCTUATIONS_MARKS, FRENCH_TEXT_WITHOUT_PUNCTUATIONS_MARKS),
710 ("german", GERMAN_TEXT_WITH_PUNCTUATIONS_MARKS, GERMAN_TEXT_WITHOUT_PUNCTUATIONS_MARKS)]
711 }
712
713 fn get_micro_dictionaries_content() -> HashMap<&'static str, Vec<String>>{
715 let mut micro_dictionaries: HashMap<&'static str, Vec<String>> = HashMap::new();
716 micro_dictionaries.insert("english", vec!("yes".to_string(), "no".to_string(), "dog".to_string(), "cat".to_string(), "snake".to_string()));
717 micro_dictionaries.insert("spanish", vec!("si".to_string(), "no".to_string(), "perro".to_string(), "gato".to_string()));
718 micro_dictionaries.insert("french", vec!("qui".to_string(), "non".to_string(), "chien".to_string(), "chat".to_string()));
719 micro_dictionaries.insert("german", vec!("ja".to_string(), "nein".to_string(), "hund".to_string(), "katze".to_string()));
720 micro_dictionaries
721 }
722
723
724 #[fixture]
729 pub fn loaded_micro_dictionary_temp_dir() -> (TestEnvironment, TemporalEnvironmentVariable) {
730 let (temp_env, temp_env_database_path) = temporary_database_folder(None);
731 database::create_database();
732 let micro_dictionaries= get_micro_dictionaries_content();
733 for (_language, _words) in µ_dictionaries {
735 let mut language_dictionary = Dictionary::new(_language, true)
736 .expect(format!("Dictionary not found for {} language", _language).as_str());
737 _words.iter().map(|_word| language_dictionary.add_word(_word)).collect::<Vec<_>>();
738 }
739 for (_language, _words) in micro_dictionaries {
740 let language_dictionary = Dictionary::new(_language, false)
741 .expect(format!("Dictionary not found for {} language", _language).as_str());
742 assert!(_words.iter().all(|_word| language_dictionary.word_exists(_word)));
743 }
744 (temp_env, temp_env_database_path)
745 }
746
747 struct TemporaryTextFile {
752 pub text_file: File,
753 pub normalized_text: String,
754 pub language_name: String,
755 pub temp_filename: PathBuf
756 }
757
758 impl TemporaryTextFile {
759 pub fn new<T, U, V, W>(temp_dir: T, text: U, normalized_text: V, language_name: W)-> Self
760 where T: AsRef<Path>,
761 U: AsRef<str>,
762 V: AsRef<str>,
763 W: AsRef<str> {
764 let mut temporary_text_file_pathname = PathBuf::from(temp_dir.as_ref().as_os_str());
765 temporary_text_file_pathname.push(TEXT_FILE_NAME);
766 let mut text_file = OpenOptions::new()
767 .write(true)
768 .create(true)
769 .truncate(true)
770 .open(&temporary_text_file_pathname)
771 .expect("Error opening temporary text file for writing into it.");
772 text_file.write_all(text.as_ref().as_bytes());
773 TemporaryTextFile {
774 text_file,
775 normalized_text: normalized_text.as_ref().to_string(),
776 language_name: language_name.as_ref().to_string(),
777 temp_filename: temporary_text_file_pathname
778 }
779 }
780 }
781
782 impl AsRef<Path> for TemporaryTextFile {
783 fn as_ref(&self) -> &Path {
784 self.temp_filename.as_path()
785 }
786 }
787
788
789 fn temporary_database_folder(temp_dir: Option<TestEnvironment>)-> (TestEnvironment, TemporalEnvironmentVariable){
795 let temp_dir = match temp_dir {
796 None => TestEnvironment::new(),
797 Some(test_env) => test_env
798 };
799 let mut temp_database_path = PathBuf::from(temp_dir.path());
800 temp_database_path.push("cifra_database.sqlite");
801 let temp_env_database_path = TemporalEnvironmentVariable::new(database::DATABASE_ENV_VAR,
802 temp_database_path.as_os_str().to_str()
803 .expect("Path contains non unicode chars."));
804 (temp_dir, temp_env_database_path)
805 }
806
807 #[test]
808 fn test_open_not_existing_dictionary() {
809 let (temp_dir, temp_env_database_path) = temporary_database_folder(None);
810 match Dictionary::new("english", false) {
811 Ok(_)=> assert!(false),
812 Err(_)=> assert!(true)
813 }
814 }
815
816 #[test]
817 fn test_open_existing_dictionary() {
818 let (temp_dir, temp_env_database_path) = temporary_database_folder(None);
819 database::create_database();
820 {
822 Dictionary::new("english", true);
823 }
824 {
826 let english_dictionary = Dictionary::new("english", false)
827 .expect("Error opening dictionary.");
828 assert!(english_dictionary.already_created());
829 }
830 }
831
832 #[test]
833 fn test_cwd_word() {
835 let (temp_dir, temp_env_database_path) = temporary_database_folder(None);
836 database::create_database();
837 let _word = "test";
838 let mut english_dictionary = Dictionary::new("english", true)
839 .expect("Error opening dictionary");
840 assert!(!english_dictionary.word_exists(_word));
841 english_dictionary.add_word(_word);
842 assert!(english_dictionary.word_exists(_word));
843 english_dictionary.remove_word(_word);
844 assert!(!english_dictionary.word_exists(_word));
845 }
846
847 #[test]
848 fn test_create_language() {
850 let (temp_dir, temp_env_database_path) = temporary_database_folder(None);
851 let mut english_dictionary = Dictionary {
852 language: "english".to_string(),
853 language_id: 0,
854 database: database::create_database().expect("Error creating database")
855 };
856 assert!(!english_dictionary.already_created());
857 english_dictionary.create_dictionary();
858 assert!(english_dictionary.already_created());
859 }
860
861 #[test]
862 fn test_delete_language() {
864 let mut micro_dictionaries = get_micro_dictionaries_content();
865 let (temp_dir, temp_env_database_path) = loaded_micro_dictionary_temp_dir();
866 let language_to_remove = "german";
867 Dictionary::remove_dictionary(language_to_remove);
868 let not_existing_dictionary = Dictionary {
870 language: language_to_remove.to_string(),
871 language_id: 0,
872 database: database::create_database().expect("Error creating database")
873 };
874 let micro_dictionary = micro_dictionaries.get(language_to_remove)
875 .expect("Error opening dictionary to be removed");
876 assert!(micro_dictionary.iter().all(|_word| !not_existing_dictionary.word_exists(_word)));
877 }
878
879 #[test]
880 fn test_get_words_from_text_file() {
881 let temp_dir = TestEnvironment::new();
882 let text_tuples = get_text_tuples();
883 for (language_name, text_with_puntuation_marks, text_without_punctuation_marks) in text_tuples {
884 let temporary_text = TemporaryTextFile::new(&temp_dir,
885 text_with_puntuation_marks,
886 text_without_punctuation_marks,
887 language_name);
888 let mut expected_set = HashSet::new();
889 temporary_text.normalized_text.to_lowercase().split_ascii_whitespace().map(|_word| expected_set.insert(_word.to_string())).collect::<Vec<_>>();
890 let returned_set = get_words_from_text_file(temporary_text.temp_filename)
891 .expect("Error reading text file");
892 let mut diff: Vec<String> = Vec::new();
893 for x in returned_set.symmetric_difference(&expected_set){
894 diff.push(x.clone());
895 }
896 assert_eq!(expected_set, returned_set);
897 }
898 }
899
900 #[test]
901 fn test_populate_words_from_text_files() {
902 let (temp_dir, temp_env_database_path) = temporary_database_folder(None);
903 database::create_database();
904 let temporary_text_file = TemporaryTextFile::new(&temp_dir,
905 ENGLISH_TEXT_WITH_PUNCTUATIONS_MARKS,
906 ENGLISH_TEXT_WITHOUT_PUNCTUATIONS_MARKS,
907 "english");
908 let mut expected_set: HashSet<&str> = HashSet::new();
909 let expected_file_content = temporary_text_file.normalized_text;
910 let expected_lowercase_content = expected_file_content.to_lowercase();
911 expected_lowercase_content.split_ascii_whitespace().map(|x| expected_set.insert(x)).collect::<Vec<_>>();
912 {
913 let mut dictionary = Dictionary::new(&temporary_text_file.language_name, true)
914 .expect("Error opening dictionary");
915 dictionary.populate(temporary_text_file.temp_filename.as_path());
916 }
917 {
918 let dictionary = Dictionary::new(&temporary_text_file.language_name, false)
919 .expect("Error opening dictionary");
920 assert!(expected_set.iter().all(|_word| dictionary.word_exists(_word)));
921 }
922 }
923
924 #[test]
925 fn test_get_words_from_text() {
926 let test_tuples = get_text_tuples();
927 for test_tuple in test_tuples {
928 let mut expected_set = HashSet::new();
929 test_tuple.2.to_lowercase().split_ascii_whitespace().map(|_word| expected_set.insert(_word.to_string())).collect::<Vec<_>>();
930 let returned_set = get_words_from_text(test_tuple.1);
931 assert_eq!(expected_set, returned_set);
932 }
933 }
934
935 #[test]
936 fn test_get_dictionaries_names() {
937 let loaded_dictionaries = LoadedDictionaries::new();
938 let dictionaries_names = Dictionary::get_dictionaries_names().expect("Error getting dictionaries names.");
939 assert_eq!(dictionaries_names, loaded_dictionaries.languages)
940 }
941
942 #[test]
943 fn test_add_multiple_words() {
944 let (temp_dir, temp_env_database_path) = temporary_database_folder(None);
945 database::create_database();
946 let _language = "english";
947 let micro_dictionaries = get_micro_dictionaries_content();
948 let mut words_to_add: HashSet<String> = HashSet::new();
949 micro_dictionaries[_language].iter().map(|_word| words_to_add.insert(_word.clone())).collect::<Vec<_>>();
950 let mut dictionary = Dictionary::new(_language, true)
951 .expect("Error opening dictionary.");
952 assert!(!micro_dictionaries[_language].iter().all(|_word| dictionary.word_exists(_word)));
953 dictionary.add_multiple_words(&words_to_add);
954 assert!(micro_dictionaries[_language].iter().all(|_word| dictionary.word_exists(_word)));
955 }
956
957 #[test]
958 fn test_identify_language() {
959 let loaded_dictionaries = LoadedDictionaries::new();
960 let test_cases = vec![(ENGLISH_TEXT_WITH_PUNCTUATIONS_MARKS, "english"),
961 (SPANISH_TEXT_WITH_PUNCTUATIONS_MARKS, "spanish")];
962 for (text, expected_language) in test_cases{
963 let identified_language = identify_language(text).expect("Error identifying language.");
964 if let Some(winner) = identified_language.winner {
965 assert_eq!(winner, expected_language, "Language not correctly identified.");
966 } else {
967 assert!(false, "Language not identified")
968 }
969 if let Some(winner_probability) = identified_language.winner_probability {
970 assert_eq!(winner_probability, 1.0, "Language probability incorrectly calculated.");
971 } else {
972 assert!(false, "Language probability not found.")
973 }
974 }
975 }
976
977 #[test]
978 fn test_get_word_pattern() {
979 let _word = "HGHHU";
980 let expected_word_pattern = "0.1.0.0.2";
981 let _word_pattern = get_word_pattern(_word);
982 assert_eq!(_word_pattern.as_str(), expected_word_pattern,
983 "Obtained pattern {} is not what we were waiting for {}.",
984 _word_pattern.as_str(), expected_word_pattern );
985 }
986
987 #[test]
988 fn test_store_word_pattern() {
989 let _word = "classification";
990 let (temp_dir, temp_env_database_path) = temporary_database_folder(None);
991 database::create_database();
992 if let Ok(mut test_dictionary) = Dictionary::new("test", true) {
993 assert!(!test_dictionary.word_exists(_word));
994 test_dictionary.add_word(_word);
995 assert!(test_dictionary.word_exists(_word));
996 let _words = test_dictionary.get_words_with_pattern("0.1.2.3.3.4.5.4.0.2.6.4.7.8").expect("No word found with that pattern");
997 assert!(_words.contains(&_word.to_string()));
998 } else {
999 assert!(false, "Could not create dictionary.")
1000 }
1001
1002 }
1003
1004 #[test]
1005 fn test_insertion_ordered_set() {
1006 let expected_list = vec!["A".to_string(), "B".to_string(), "C".to_string()];
1007 let mut set: InsertionOrderedSet<String> = InsertionOrderedSet::new();
1008 set.insert("A".to_string());
1009 set.insert("B".to_string());
1010 set.insert("C".to_string());
1011 set.insert("B".to_string());
1013 let recovered_list: Vec<String> = set.iter().cloned().collect();
1014 assert_eq!(recovered_list, expected_list,
1015 "Recovered list {:?} but we were expecting {:?}",
1016 recovered_list, expected_list);
1017 }
1018
1019 #[rstest]
1020 fn test_get_all_words(loaded_micro_dictionary_temp_dir: (TestEnvironment, TemporalEnvironmentVariable)) {
1021 let expected_words: HashSet<String> = HashSet::from_iter(vec!["yes".to_string(),
1022 "no".to_string(),
1023 "dog".to_string(),
1024 "cat".to_string(), "snake".to_string()]);
1025 let dictionary = Dictionary::new("english", false).unwrap();
1026 let returned_words = dictionary.get_all_words().unwrap();
1027 let returned_words_set = HashSet:: from_iter(returned_words);
1028 assert_eq!(returned_words_set, expected_words)
1029 }
1030}