use crate::expr::{Expr, ExprMap, FixedPhrase};
use hashbrown::HashMap;
use serde::{Deserialize, Serialize};
use super::{ExprLinter, LintGroup};
use super::{Lint, LintKind, Suggestion};
use crate::Document;
use crate::linting::expr_linter::Chunk;
use crate::parsers::PlainEnglish;
use crate::{Token, TokenStringExt};
pub struct ProperNounCapitalizationLinter {
pattern_map: ExprMap<Document>,
description: String,
}
impl ProperNounCapitalizationLinter {
pub fn new_strs(
canonical_versions: impl IntoIterator<Item = impl AsRef<str>>,
description: impl ToString,
) -> Self {
let mut expr_map = ExprMap::default();
for can_vers in canonical_versions {
let doc = Document::new_basic_tokenize(can_vers.as_ref(), &PlainEnglish);
let expr = FixedPhrase::from_document(&doc);
expr_map.insert(expr, doc);
}
Self {
pattern_map: expr_map,
description: description.to_string(),
}
}
}
impl ExprLinter for ProperNounCapitalizationLinter {
type Unit = Chunk;
fn expr(&self) -> &dyn Expr {
&self.pattern_map
}
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
let canonical_case = self.pattern_map.lookup(0, matched_tokens, source).unwrap();
let mut broken = false;
for (err_token, correct_token) in matched_tokens.iter().zip(canonical_case.fat_tokens()) {
let err_chars = err_token.get_ch(source);
if err_chars != correct_token.content {
broken = true;
break;
}
}
if !broken {
return None;
}
Some(Lint {
span: matched_tokens.span()?,
lint_kind: LintKind::Capitalization,
suggestions: vec![Suggestion::ReplaceWith(
canonical_case.get_source().to_vec(),
)],
message: self.description.to_string(),
priority: 31,
})
}
fn description(&self) -> &str {
self.description.as_str()
}
}
#[derive(Serialize, Deserialize)]
struct RuleEntry {
canonical: Vec<String>,
description: String,
}
fn lint_group_from_json(json: &str) -> LintGroup {
let mut group = LintGroup::empty();
let rules: HashMap<String, RuleEntry> = serde_json::from_str(json).unwrap();
for (key, rule) in rules.into_iter() {
group.add_chunk_expr_linter(
key,
Box::new(ProperNounCapitalizationLinter::new_strs(
rule.canonical,
rule.description,
)),
);
}
group.set_all_rules_to(Some(true));
group
}
pub fn lint_group() -> LintGroup {
lint_group_from_json(include_str!("../../proper_noun_rules.json"))
}
#[cfg(test)]
mod tests {
use super::lint_group;
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
#[test]
fn americas_lowercase() {
assert_suggestion_result("south america", lint_group(), "South America");
assert_suggestion_result("north america", lint_group(), "North America");
}
#[test]
fn americas_uppercase() {
assert_suggestion_result("SOUTH AMERICA", lint_group(), "South America");
assert_suggestion_result("NORTH AMERICA", lint_group(), "North America");
}
#[test]
fn americas_allow_correct() {
assert_lint_count("South America", lint_group(), 0);
assert_lint_count("North America", lint_group(), 0);
}
#[test]
fn issue_798() {
assert_suggestion_result(
"The United states is a big country.",
lint_group(),
"The United States is a big country.",
);
}
#[test]
fn united_nations_uppercase() {
assert_suggestion_result("UNITED NATIONS", lint_group(), "United Nations");
}
#[test]
fn united_arab_emirates_lowercase() {
assert_suggestion_result("UNITED ARAB EMIRATES", lint_group(), "United Arab Emirates");
}
#[test]
fn united_nations_allow_correct() {
assert_lint_count("United Nations", lint_group(), 0);
}
#[test]
fn meta_allow_correct() {
assert_lint_count("Meta Quest", lint_group(), 0);
}
#[test]
fn microsoft_lowercase() {
assert_suggestion_result(
"microsoft visual studio",
lint_group(),
"Microsoft Visual Studio",
);
}
#[test]
fn microsoft_first_word_is_correct() {
assert_suggestion_result(
"Microsoft visual studio",
lint_group(),
"Microsoft Visual Studio",
);
}
#[test]
fn test_atlantic_ocean_lowercase() {
assert_suggestion_result("atlantic ocean", lint_group(), "Atlantic Ocean");
}
#[test]
fn test_pacific_ocean_lowercase() {
assert_suggestion_result("pacific ocean", lint_group(), "Pacific Ocean");
}
#[test]
fn test_indian_ocean_lowercase() {
assert_suggestion_result("indian ocean", lint_group(), "Indian Ocean");
}
#[test]
fn test_southern_ocean_lowercase() {
assert_suggestion_result("southern ocean", lint_group(), "Southern Ocean");
}
#[test]
fn test_arctic_ocean_lowercase() {
assert_suggestion_result("arctic ocean", lint_group(), "Arctic Ocean");
}
#[test]
fn test_mediterranean_sea_lowercase() {
assert_suggestion_result("mediterranean sea", lint_group(), "Mediterranean Sea");
}
#[test]
fn test_caribbean_sea_lowercase() {
assert_suggestion_result("caribbean sea", lint_group(), "Caribbean Sea");
}
#[test]
fn test_south_china_sea_lowercase() {
assert_suggestion_result("south china sea", lint_group(), "South China Sea");
}
#[test]
fn test_atlantic_ocean_correct() {
assert_lint_count("Atlantic Ocean", lint_group(), 0);
}
#[test]
fn test_pacific_ocean_correct() {
assert_lint_count("Pacific Ocean", lint_group(), 0);
}
#[test]
fn test_indian_ocean_correct() {
assert_lint_count("Indian Ocean", lint_group(), 0);
}
#[test]
fn test_mediterranean_sea_correct() {
assert_lint_count("Mediterranean Sea", lint_group(), 0);
}
#[test]
fn test_south_china_sea_correct() {
assert_lint_count("South China Sea", lint_group(), 0);
}
#[test]
fn day_one_in_sentence() {
assert_suggestion_result(
"I love day one. It is the best journaling app.",
lint_group(),
"I love Day One. It is the best journaling app.",
);
}
#[test]
fn gilded_age_in_sentence() {
assert_suggestion_result(
"Mani-Chess Destiny is a JavaScript based computer game built off of chess, but in the style of the gilded age.",
lint_group(),
"Mani-Chess Destiny is a JavaScript based computer game built off of chess, but in the style of the Gilded Age.",
);
}
#[test]
fn chrome_extension_lowercase() {
assert_suggestion_result("chrome extension", lint_group(), "Chrome Extension");
}
#[test]
fn chrome_extension_uppercase() {
assert_suggestion_result("CHROME EXTENSION", lint_group(), "Chrome Extension");
}
#[test]
fn chrome_extension_mixed_case() {
assert_suggestion_result("cHrOmE eXtEnSiOn", lint_group(), "Chrome Extension");
}
#[test]
fn chrome_extension_second_word_lowercase() {
assert_suggestion_result("Chrome extension", lint_group(), "Chrome Extension");
}
#[test]
fn chrome_extension_first_word_lowercase() {
assert_suggestion_result("chrome Extension", lint_group(), "Chrome Extension");
}
#[test]
fn chrome_extension_in_sentence() {
assert_suggestion_result(
"Install the chrome extension from the store.",
lint_group(),
"Install the Chrome Extension from the store.",
);
}
#[test]
fn chrome_extension_with_leading_article() {
assert_suggestion_result(
"The chrome extension is ready.",
lint_group(),
"The Chrome Extension is ready.",
);
}
#[test]
fn chrome_extension_with_trailing_period() {
assert_suggestion_result(
"We shipped the chrome extension.",
lint_group(),
"We shipped the Chrome Extension.",
);
}
#[test]
fn chrome_extension_with_trailing_comma() {
assert_suggestion_result(
"The chrome extension, not the app, needs review.",
lint_group(),
"The Chrome Extension, not the app, needs review.",
);
}
#[test]
fn chrome_extension_with_trailing_colon() {
assert_suggestion_result(
"Preferred install target: chrome extension",
lint_group(),
"Preferred install target: Chrome Extension",
);
}
#[test]
fn chrome_extension_inside_quotes() {
assert_suggestion_result(
"They called it the \"chrome extension\" build.",
lint_group(),
"They called it the \"Chrome Extension\" build.",
);
}
#[test]
fn chrome_extension_across_sentence_boundary_not_present() {
assert_lint_count("Chrome. Extension", lint_group(), 0);
}
#[test]
fn chrome_extension_allows_correct_case() {
assert_lint_count("Chrome Extension", lint_group(), 0);
}
#[test]
fn chrome_extension_allows_correct_case_in_sentence() {
assert_lint_count("The Chrome Extension is ready.", lint_group(), 0);
}
#[test]
fn browser_extension_not_flagged() {
assert_lint_count("browser extension", lint_group(), 0);
}
}