use crate::checkers::checker_result::CheckResult;
use gibberish_or_not::{is_gibberish, Sensitivity};
use lemmeknow::Identifier;
use crate::checkers::checker_type::{Check, Checker};
use crate::config::get_config;
pub struct EnglishChecker;
impl Check for Checker<EnglishChecker> {
fn new() -> Self {
let config = get_config();
Checker {
name: "English Checker",
description: "Uses gibberish detection to check if text is meaningful English",
link: "https://crates.io/crates/gibberish-or-not",
tags: vec!["english", "nlp"],
expected_runtime: 0.01,
popularity: 1.0,
lemmeknow_config: Identifier::default(),
enhanced_detector: None,
sensitivity: Sensitivity::Medium, _phantom: std::marker::PhantomData,
}
}
fn check(&self, text: &str) -> CheckResult {
let text = normalise_string(text);
let config = get_config();
let is_enhanced = config.enhanced_detection;
let mut result = CheckResult {
is_identified: if is_enhanced {
!is_gibberish(&text, Sensitivity::High)
} else {
!is_gibberish(&text, self.sensitivity)
},
text: text.to_string(),
checker_name: self.name,
checker_description: self.description,
description: "Words".to_string(),
link: self.link,
};
if text.len() < 2 {
result.is_identified = false;
}
result
}
fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self {
self.sensitivity = sensitivity;
self
}
fn get_sensitivity(&self) -> Sensitivity {
self.sensitivity
}
}
fn normalise_string(input: &str) -> String {
input
.to_ascii_lowercase()
.chars()
.filter(|x| !x.is_ascii_punctuation())
.collect()
}
#[cfg(test)]
mod tests {
use crate::checkers::english::normalise_string;
use crate::checkers::{
checker_type::{Check, Checker},
english::EnglishChecker,
};
use gibberish_or_not::Sensitivity;
#[test]
fn test_check_basic() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("preinterview").is_identified);
}
#[test]
fn test_check_basic2() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("and").is_identified);
}
#[test]
fn test_check_multiple_words() {
let checker = Checker::<EnglishChecker>::new();
assert!(
checker
.check("this is a valid english sentence")
.is_identified
);
}
#[test]
fn test_check_non_dictionary_word() {
let checker = Checker::<EnglishChecker>::new();
assert!(
!checker
.check("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark")
.is_identified
);
}
#[test]
fn test_check_multiple_words2() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("preinterview hello dog").is_identified);
}
#[test]
fn test_check_normalise_string_works_with_lowercasing() {
let x = normalise_string("Hello Dear");
assert_eq!(x, "hello dear")
}
#[test]
fn test_check_normalise_string_works_with_puncuation() {
let x = normalise_string("Hello, Dear");
assert_eq!(x, "hello dear")
}
#[test]
fn test_check_normalise_string_works_with_messy_puncuation() {
let x = normalise_string(".He/ll?O, Dea!r");
assert_eq!(x, "hello dear")
}
#[test]
fn test_checker_works_with_puncuation_and_lowercase() {
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("Prei?nterview He!llo Dog?").is_identified);
}
#[test]
fn test_check_fail_single_puncuation_char() {
let checker = Checker::<EnglishChecker>::new();
assert!(!checker.check("#").is_identified);
}
#[test]
fn test_default_sensitivity_is_medium() {
let checker = Checker::<EnglishChecker>::new();
assert!(matches!(checker.get_sensitivity(), Sensitivity::Medium));
}
#[test]
fn test_with_sensitivity_changes_sensitivity() {
let checker = Checker::<EnglishChecker>::new().with_sensitivity(Sensitivity::Low);
assert!(matches!(checker.get_sensitivity(), Sensitivity::Low));
let checker = Checker::<EnglishChecker>::new().with_sensitivity(Sensitivity::High);
assert!(matches!(checker.get_sensitivity(), Sensitivity::High));
}
#[test]
fn test_sensitivity_affects_gibberish_detection() {
let text = "Rcl maocr otmwi lit dnoen oehc 13 iron seah.";
let low_checker = Checker::<EnglishChecker>::new().with_sensitivity(Sensitivity::Low);
assert!(!low_checker.check(text).is_identified);
let high_checker = Checker::<EnglishChecker>::new().with_sensitivity(Sensitivity::High);
assert!(high_checker.check(text).is_identified);
}
}