use crate::checkers::checker_result::CheckResult;
use crate::storage;
use lemmeknow::Identifier;
use log::{debug, trace};
use crate::checkers::checker_type::{Check, Checker};
pub struct EnglishChecker;
impl Check for Checker<EnglishChecker> {
fn new() -> Self {
Checker {
name: "English Checker",
description: "Checks for english words",
link: "https://en.wikipedia.org/wiki/List_of_English_words",
tags: vec!["english"],
expected_runtime: 0.1,
popularity: 1.0,
lemmeknow_config: Identifier::default(),
_phantom: std::marker::PhantomData,
}
}
fn check(&self, input: &str) -> CheckResult {
let original_input = input;
let input = normalise_string(input);
trace!("Checking English for sentence {}", input);
const PLAINTEXT_DETECTION_PERCENTAGE: f64 = 0.4;
let mut words_found: f64 = 0.0;
let filename = "English text";
let mut result = CheckResult {
is_identified: false,
text: original_input.to_string(),
checker_name: self.name,
checker_description: self.description,
description: filename.to_string(),
link: self.link,
};
if input.is_empty() {
return result;
}
let split_input = input.split(' ');
for word in split_input {
if storage::DICTIONARIES
.iter()
.any(|(_, words)| words.contains(word))
{
trace!("Found word {} in English", word);
words_found += 1.0;
}
trace!(
"Checking word {} with words_found {} and input length: {}",
word,
words_found,
input.len()
);
if words_found / (input.split(' ').count()) as f64 > PLAINTEXT_DETECTION_PERCENTAGE {
debug!("Found {} words in {}", words_found, original_input);
debug!(
"Returning from English chekcer successfully with {}",
original_input
);
result.is_identified = true;
break;
}
}
result
}
}
fn normalise_string(input: &str) -> String {
input
.to_ascii_lowercase()
.chars()
.filter(|x| !x.is_ascii_punctuation())
.collect()
}
#[cfg(test)]
mod tests {
use crate::checkers::english::normalise_string;
use crate::checkers::{
checker_type::{Check, Checker},
english::EnglishChecker,
};
#[test]
fn test_check_basic() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("preinterview").is_identified);
}
#[test]
fn test_check_basic2() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("and").is_identified);
}
#[test]
fn test_check_multiple_words() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("zzz zu'lkadah zenelophon").is_identified);
}
#[test]
fn test_check_non_dictionary_word() {
let checker = Checker::<EnglishChecker>::new();
assert!(
!checker
.check("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark")
.is_identified
);
}
#[test]
fn test_check_multiple_words2() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("preinterview hello dog").is_identified);
}
#[test]
fn test_check_normalise_string_works_with_lowercasing() {
let x = normalise_string("Hello Dear");
assert_eq!(x, "hello dear")
}
#[test]
fn test_check_normalise_string_works_with_puncuation() {
let x = normalise_string("Hello, Dear");
assert_eq!(x, "hello dear")
}
#[test]
fn test_check_normalise_string_works_with_messy_puncuation() {
let x = normalise_string(".He/ll?O, Dea!r");
assert_eq!(x, "hello dear")
}
#[test]
fn test_checker_works_with_puncuation_and_lowercase() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("Prei?nterview He!llo Dog?").is_identified);
}
#[test]
fn test_checker_fails_doesnt_hit_40_percent() {
let checker = Checker::<EnglishChecker>::new();
assert!(
!checker
.check("Hello Dog nnnnnnnnnnn llllllll ppppppppp gggggggg")
.is_identified
);
}
#[test]
fn test_check_fail_single_puncuation_char() {
let checker = Checker::<EnglishChecker>::new();
assert!(!checker.check("#").is_identified);
}
}