1use crate::checkers::checker_result::CheckResult;
2use crate::storage;
3use lemmeknow::Identifier;
4use log::{debug, trace};
5
6use crate::checkers::checker_type::{Check, Checker};
7
8pub struct EnglishChecker;
10
11impl Check for Checker<EnglishChecker> {
13 fn new() -> Self {
14 Checker {
15 name: "English Checker",
16 description: "Checks for english words",
17 link: "https://en.wikipedia.org/wiki/List_of_English_words",
18 tags: vec!["english"],
19 expected_runtime: 0.1,
20 popularity: 1.0,
22 lemmeknow_config: Identifier::default(),
23 _phantom: std::marker::PhantomData,
24 }
25 }
26
27 fn check(&self, input: &str) -> CheckResult {
28 let original_input = input;
29 let input = normalise_string(input);
31 trace!("Checking English for sentence {}", input);
32 const PLAINTEXT_DETECTION_PERCENTAGE: f64 = 0.4;
36 let mut words_found: f64 = 0.0;
37
38 let filename = "English text";
40
41 let mut result = CheckResult {
42 is_identified: false,
43 text: original_input.to_string(),
44 checker_name: self.name,
45 checker_description: self.description,
46 description: filename.to_string(),
47 link: self.link,
48 };
49
50 if input.is_empty() {
53 return result;
54 }
55
56 let split_input = input.split(' ');
57
58 for word in split_input {
60 if storage::DICTIONARIES
68 .iter()
69 .any(|(_, words)| words.contains(word))
70 {
71 trace!("Found word {} in English", word);
72 words_found += 1.0;
73 }
74
75 trace!(
76 "Checking word {} with words_found {} and input length: {}",
77 word,
78 words_found,
79 input.len()
80 );
81 if words_found / (input.split(' ').count()) as f64 > PLAINTEXT_DETECTION_PERCENTAGE {
83 debug!("Found {} words in {}", words_found, original_input);
84 debug!(
85 "Returning from English chekcer successfully with {}",
86 original_input
87 );
88 result.is_identified = true;
89 break;
90 }
91 }
92
93 result
94 }
95}
96
97fn normalise_string(input: &str) -> String {
103 input
106 .to_ascii_lowercase()
107 .chars()
108 .filter(|x| !x.is_ascii_punctuation())
109 .collect()
110}
111
112#[cfg(test)]
113mod tests {
114 use crate::checkers::english::normalise_string;
115 use crate::checkers::{
116 checker_type::{Check, Checker},
117 english::EnglishChecker,
118 };
119
120 #[test]
121 fn test_check_basic() {
122 let checker = Checker::<EnglishChecker>::new();
123 assert!(checker.check("preinterview").is_identified);
124 }
125
126 #[test]
127 fn test_check_basic2() {
128 let checker = Checker::<EnglishChecker>::new();
129 assert!(checker.check("and").is_identified);
130 }
131
132 #[test]
133 fn test_check_multiple_words() {
134 let checker = Checker::<EnglishChecker>::new();
135 assert!(checker.check("zzz zu'lkadah zenelophon").is_identified);
136 }
137
138 #[test]
139 fn test_check_non_dictionary_word() {
140 let checker = Checker::<EnglishChecker>::new();
141 assert!(
142 !checker
143 .check("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark")
144 .is_identified
145 );
146 }
147
148 #[test]
149 fn test_check_multiple_words2() {
150 let checker = Checker::<EnglishChecker>::new();
151 assert!(checker.check("preinterview hello dog").is_identified);
152 }
153 #[test]
154 fn test_check_normalise_string_works_with_lowercasing() {
155 let x = normalise_string("Hello Dear");
156 assert_eq!(x, "hello dear")
157 }
158 #[test]
159 fn test_check_normalise_string_works_with_puncuation() {
160 let x = normalise_string("Hello, Dear");
161 assert_eq!(x, "hello dear")
162 }
163 #[test]
164 fn test_check_normalise_string_works_with_messy_puncuation() {
165 let x = normalise_string(".He/ll?O, Dea!r");
166 assert_eq!(x, "hello dear")
167 }
168
169 #[test]
170 fn test_checker_works_with_puncuation_and_lowercase() {
171 let checker = Checker::<EnglishChecker>::new();
172 assert!(checker.check("Prei?nterview He!llo Dog?").is_identified);
173 }
174
175 #[test]
176 fn test_checker_fails_doesnt_hit_40_percent() {
177 let checker = Checker::<EnglishChecker>::new();
178 assert!(
179 !checker
180 .check("Hello Dog nnnnnnnnnnn llllllll ppppppppp gggggggg")
181 .is_identified
182 );
183 }
184
185 #[test]
186 fn test_check_fail_single_puncuation_char() {
187 let checker = Checker::<EnglishChecker>::new();
188 assert!(!checker.check("#").is_identified);
189 }
190}