use std::collections::HashSet;
use spellbook::Dictionary;
use crate::checker::Checker;
use crate::diagnostic::{Diagnostic, Severity};
use crate::po::entry::Entry;
use crate::po::format::iter::FormatWordPos;
use crate::po::format::language::Language;
use crate::po::message::Message;
use crate::rules::rule::RuleChecker;
pub struct SpellingCtxtRule;
impl RuleChecker for SpellingCtxtRule {
fn name(&self) -> &'static str {
"spelling-ctxt"
}
fn is_default(&self) -> bool {
false
}
fn is_check(&self) -> bool {
true
}
fn severity(&self) -> Severity {
Severity::Info
}
fn check_ctxt(&self, checker: &Checker, entry: &Entry, msgctxt: &Message) -> Vec<Diagnostic> {
if let Some(dict) = &checker.dict_id {
let (misspelled_words, pos_words) =
check_words(&msgctxt.value, &entry.format_language, dict);
if !misspelled_words.is_empty() {
return vec![
self.new_diag(checker, "misspelled words in context".to_string())
.with_msg_hl(msgctxt, &pos_words)
.with_misspelled_words(misspelled_words),
];
}
}
vec![]
}
}
pub struct SpellingIdRule;
impl RuleChecker for SpellingIdRule {
fn name(&self) -> &'static str {
"spelling-id"
}
fn is_default(&self) -> bool {
false
}
fn is_check(&self) -> bool {
true
}
fn severity(&self) -> Severity {
Severity::Info
}
fn check_msg(
&self,
checker: &Checker,
entry: &Entry,
msgid: &Message,
msgstr: &Message,
) -> Vec<Diagnostic> {
if let Some(dict) = &checker.dict_id {
let (misspelled_words, pos_words) =
check_words(&msgid.value, &entry.format_language, dict);
if !misspelled_words.is_empty() {
return vec![
self.new_diag(checker, "misspelled words in source".to_string())
.with_msgs_hl(msgid, &pos_words, msgstr, &[])
.with_misspelled_words(misspelled_words),
];
}
}
vec![]
}
}
pub struct SpellingStrRule;
impl RuleChecker for SpellingStrRule {
fn name(&self) -> &'static str {
"spelling-str"
}
fn is_default(&self) -> bool {
false
}
fn is_check(&self) -> bool {
true
}
fn severity(&self) -> Severity {
Severity::Info
}
fn check_msg(
&self,
checker: &Checker,
entry: &Entry,
msgid: &Message,
msgstr: &Message,
) -> Vec<Diagnostic> {
if let Some(dict) = &checker.dict_str {
let (misspelled_words, pos_words) =
check_words(&msgstr.value, &entry.format_language, dict);
if !misspelled_words.is_empty() {
return vec![
self.new_diag(checker, "misspelled words in translation".to_string())
.with_msgs_hl(msgid, &[], msgstr, &pos_words)
.with_misspelled_words(misspelled_words),
];
}
}
vec![]
}
}
fn check_words<'s>(
s: &'s str,
format_language: &Language,
dict: &Dictionary,
) -> (HashSet<&'s str>, Vec<(usize, usize)>) {
let mut misspelled_words: HashSet<&str> = HashSet::new();
let mut hash_words: HashSet<&str> = HashSet::new();
let mut pos_words = Vec::new();
for word in FormatWordPos::new(s, format_language) {
if word.s.chars().any(|c| c.is_ascii_digit()) {
continue;
}
if word.s.len() >= 2 && word.s.chars().all(|c| c.is_ascii_uppercase()) {
continue;
}
if hash_words.contains(word.s) {
if misspelled_words.contains(word.s) {
pos_words.push((word.start, word.end));
}
} else {
hash_words.insert(word.s);
if !dict.check(word.s) {
misspelled_words.insert(word.s);
pos_words.push((word.start, word.end));
}
}
}
(misspelled_words, pos_words)
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::*;
use crate::{config::Config, diagnostic::Diagnostic, rules::rule::Rules};
fn check_spelling(content: &str) -> Vec<Diagnostic> {
let mut test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
test_dir.push("resources");
test_dir.push("test");
let mut config = Config::default();
config.check.path_dicts = test_dir;
let mut checker = Checker::new(content.as_bytes()).with_config(config);
let rules = Rules::new(vec![
Box::new(SpellingCtxtRule {}),
Box::new(SpellingIdRule {}),
Box::new(SpellingStrRule {}),
]);
checker.do_all_checks(&rules);
checker.diagnostics
}
#[test]
fn test_spelling_ok() {
let diags = check_spelling(
r#"
msgid ""
msgstr "Language: fr\n"
msgctxt "some context"
msgid "tested: HTTP v3"
msgstr "testé : HTTP v3"
"#,
);
assert!(diags.is_empty());
}
#[test]
fn test_spelling_error_noqa() {
let diags = check_spelling(
r#"
msgid ""
msgstr "Language: fr\n"
#, noqa:spelling-ctxt;spelling-id;spelling-str
msgctxt "some contxet, some contxet"
msgid "this is a tyypo, this is a tyypo"
msgstr "ceci est unz fôte, ceci est unz fôte"
"#,
);
assert!(diags.is_empty());
}
#[test]
fn test_spelling_error() {
let diags = check_spelling(
r#"
msgid ""
msgstr "Language: fr\n"
msgctxt "some contxet, some contxet"
msgid "this is a tyypo, this is a tyypo"
msgstr "ceci est unz fôte, ceci est unz fôte"
"#,
);
assert_eq!(diags.len(), 3);
let diag = &diags[0];
assert_eq!(diag.severity, Severity::Info);
assert_eq!(diag.build_message(), "misspelled words in context: contxet");
assert_eq!(
diag.misspelled_words,
HashSet::from(["contxet".to_string()])
);
let diag = &diags[1];
assert_eq!(diag.severity, Severity::Info);
assert_eq!(diag.build_message(), "misspelled words in source: tyypo");
assert_eq!(diag.misspelled_words, HashSet::from(["tyypo".to_string()]));
let diag = &diags[2];
assert_eq!(diag.severity, Severity::Info);
assert_eq!(
diag.build_message(),
"misspelled words in translation: fôte, unz"
);
assert_eq!(
diag.misspelled_words,
HashSet::from(["fôte".to_string(), "unz".to_string()])
);
}
}