use crate::expr::{Expr, ExprMap, FixedPhrase};
use hashbrown::HashMap;
use serde::{Deserialize, Serialize};
use super::{ExprLinter, LintGroup};
use super::{Lint, LintKind, Suggestion};
use crate::Document;
use crate::linting::expr_linter::Chunk;
use crate::parsers::PlainEnglish;
use crate::spell::Dictionary;
use crate::{Token, TokenStringExt};
use std::sync::Arc;
pub struct ProperNounCapitalizationLinter<D: Dictionary + 'static> {
pattern_map: ExprMap<Document>,
description: String,
dictionary: Arc<D>,
}
impl<D: Dictionary + 'static> ProperNounCapitalizationLinter<D> {
pub fn new_strs(
canonical_versions: impl IntoIterator<Item = impl AsRef<str>>,
description: impl ToString,
dictionary: D,
) -> Self {
let dictionary = Arc::new(dictionary);
let mut expr_map = ExprMap::default();
for can_vers in canonical_versions {
let doc = Document::new_basic_tokenize(can_vers.as_ref(), &PlainEnglish);
let expr = FixedPhrase::from_document(&doc);
expr_map.insert(expr, doc);
}
Self {
pattern_map: expr_map,
dictionary: dictionary.clone(),
description: description.to_string(),
}
}
}
impl<D: Dictionary + 'static> ExprLinter for ProperNounCapitalizationLinter<D> {
type Unit = Chunk;
fn expr(&self) -> &dyn Expr {
&self.pattern_map
}
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
let canonical_case = self.pattern_map.lookup(0, matched_tokens, source).unwrap();
let mut broken = false;
for (err_token, correct_token) in matched_tokens.iter().zip(canonical_case.fat_tokens()) {
let err_chars = err_token.get_ch(source);
if err_chars != correct_token.content {
broken = true;
break;
}
}
if !broken {
return None;
}
Some(Lint {
span: matched_tokens.span()?,
lint_kind: LintKind::Capitalization,
suggestions: vec![Suggestion::ReplaceWith(
canonical_case.get_source().to_vec(),
)],
message: self.description.to_string(),
priority: 31,
})
}
fn description(&self) -> &str {
self.description.as_str()
}
}
#[derive(Serialize, Deserialize)]
struct RuleEntry {
canonical: Vec<String>,
description: String,
}
fn lint_group_from_json(json: &str, dictionary: Arc<impl Dictionary + 'static>) -> LintGroup {
let mut group = LintGroup::empty();
let rules: HashMap<String, RuleEntry> = serde_json::from_str(json).unwrap();
for (key, rule) in rules.into_iter() {
group.add_chunk_expr_linter(
key,
Box::new(ProperNounCapitalizationLinter::new_strs(
rule.canonical,
rule.description,
dictionary.clone(),
)),
);
}
group.set_all_rules_to(Some(true));
group
}
pub fn lint_group(dictionary: Arc<impl Dictionary + 'static>) -> LintGroup {
lint_group_from_json(include_str!("../../proper_noun_rules.json"), dictionary)
}
#[cfg(test)]
mod tests {
use super::lint_group;
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
use crate::spell::FstDictionary;
#[test]
fn americas_lowercase() {
assert_suggestion_result(
"south america",
lint_group(FstDictionary::curated()),
"South America",
);
assert_suggestion_result(
"north america",
lint_group(FstDictionary::curated()),
"North America",
);
}
#[test]
fn americas_uppercase() {
assert_suggestion_result(
"SOUTH AMERICA",
lint_group(FstDictionary::curated()),
"South America",
);
assert_suggestion_result(
"NORTH AMERICA",
lint_group(FstDictionary::curated()),
"North America",
);
}
#[test]
fn americas_allow_correct() {
assert_lint_count("South America", lint_group(FstDictionary::curated()), 0);
assert_lint_count("North America", lint_group(FstDictionary::curated()), 0);
}
#[test]
fn issue_798() {
assert_suggestion_result(
"The United states is a big country.",
lint_group(FstDictionary::curated()),
"The United States is a big country.",
);
}
#[test]
fn united_nations_uppercase() {
assert_suggestion_result(
"UNITED NATIONS",
lint_group(FstDictionary::curated()),
"United Nations",
);
}
#[test]
fn united_arab_emirates_lowercase() {
assert_suggestion_result(
"UNITED ARAB EMIRATES",
lint_group(FstDictionary::curated()),
"United Arab Emirates",
);
}
#[test]
fn united_nations_allow_correct() {
assert_lint_count("United Nations", lint_group(FstDictionary::curated()), 0);
}
#[test]
fn meta_allow_correct() {
assert_lint_count("Meta Quest", lint_group(FstDictionary::curated()), 0);
}
#[test]
fn microsoft_lowercase() {
assert_suggestion_result(
"microsoft visual studio",
lint_group(FstDictionary::curated()),
"Microsoft Visual Studio",
);
}
#[test]
fn microsoft_first_word_is_correct() {
assert_suggestion_result(
"Microsoft visual studio",
lint_group(FstDictionary::curated()),
"Microsoft Visual Studio",
);
}
#[test]
fn test_atlantic_ocean_lowercase() {
let dictionary = FstDictionary::curated();
assert_suggestion_result("atlantic ocean", lint_group(dictionary), "Atlantic Ocean");
}
#[test]
fn test_pacific_ocean_lowercase() {
let dictionary = FstDictionary::curated();
assert_suggestion_result("pacific ocean", lint_group(dictionary), "Pacific Ocean");
}
#[test]
fn test_indian_ocean_lowercase() {
let dictionary = FstDictionary::curated();
assert_suggestion_result("indian ocean", lint_group(dictionary), "Indian Ocean");
}
#[test]
fn test_southern_ocean_lowercase() {
let dictionary = FstDictionary::curated();
assert_suggestion_result("southern ocean", lint_group(dictionary), "Southern Ocean");
}
#[test]
fn test_arctic_ocean_lowercase() {
let dictionary = FstDictionary::curated();
assert_suggestion_result("arctic ocean", lint_group(dictionary), "Arctic Ocean");
}
#[test]
fn test_mediterranean_sea_lowercase() {
let dictionary = FstDictionary::curated();
assert_suggestion_result(
"mediterranean sea",
lint_group(dictionary),
"Mediterranean Sea",
);
}
#[test]
fn test_caribbean_sea_lowercase() {
let dictionary = FstDictionary::curated();
assert_suggestion_result("caribbean sea", lint_group(dictionary), "Caribbean Sea");
}
#[test]
fn test_south_china_sea_lowercase() {
let dictionary = FstDictionary::curated();
assert_suggestion_result("south china sea", lint_group(dictionary), "South China Sea");
}
#[test]
fn test_atlantic_ocean_correct() {
let dictionary = FstDictionary::curated();
assert_lint_count("Atlantic Ocean", lint_group(dictionary), 0);
}
#[test]
fn test_pacific_ocean_correct() {
let dictionary = FstDictionary::curated();
assert_lint_count("Pacific Ocean", lint_group(dictionary), 0);
}
#[test]
fn test_indian_ocean_correct() {
let dictionary = FstDictionary::curated();
assert_lint_count("Indian Ocean", lint_group(dictionary), 0);
}
#[test]
fn test_mediterranean_sea_correct() {
let dictionary = FstDictionary::curated();
assert_lint_count("Mediterranean Sea", lint_group(dictionary), 0);
}
#[test]
fn test_south_china_sea_correct() {
let dictionary = FstDictionary::curated();
assert_lint_count("South China Sea", lint_group(dictionary), 0);
}
#[test]
fn day_one_in_sentence() {
assert_suggestion_result(
"I love day one. It is the best journaling app.",
lint_group(FstDictionary::curated()),
"I love Day One. It is the best journaling app.",
);
}
#[test]
fn gilded_age_in_sentence() {
assert_suggestion_result(
"Mani-Chess Destiny is a JavaScript based computer game built off of chess, but in the style of the gilded age.",
lint_group(FstDictionary::curated()),
"Mani-Chess Destiny is a JavaScript based computer game built off of chess, but in the style of the Gilded Age.",
);
}
}