use std::num::NonZeroU32;
use unicode_segmentation::UnicodeSegmentation;
use crate::config::Profile;
use crate::parser::Document;
use crate::rules::Rule;
use crate::types::{Diagnostic, Language, Location, Severity, SourceFile};
const MIN_WORD_LEN: usize = 5;
const EN_SUFFIXES: &[&str] = &[
"tion", "sion", "ment", "ance", "ence", "ity", "ism", "ness", "al",
];
const FR_SUFFIXES: &[&str] = &[
"tion", "sion", "ment", "ance", "ence", "age", "ité", "isme", "ure",
];
#[derive(Debug, Clone)]
pub struct Config {
pub max_per_sentence: NonZeroU32,
pub custom_suffixes: Vec<String>,
}
impl Config {
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
let max = match profile {
Profile::DevDoc => 4,
Profile::Public => 3,
Profile::Falc => 2,
};
Self {
max_per_sentence: NonZeroU32::new(max).expect("non-zero literal"),
custom_suffixes: Vec::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct ExcessiveNominalization {
config: Config,
}
impl ExcessiveNominalization {
#[must_use]
pub const fn new(config: Config) -> Self {
Self { config }
}
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
Self::new(Config::for_profile(profile))
}
pub const ID: &'static str = "lexicon.excessive-nominalization";
}
impl Rule for ExcessiveNominalization {
fn id(&self) -> &'static str {
Self::ID
}
fn check(&self, document: &Document, language: Language) -> Vec<Diagnostic> {
let max = self.config.max_per_sentence.get();
let suffixes = self.suffixes_for(language);
if suffixes.is_empty() {
return Vec::new();
}
let mut diagnostics = Vec::new();
for (paragraph, section_title) in document.paragraphs_with_section() {
for sentence in ¶graph.sentences {
let count = count_nominalizations(&sentence.text, &suffixes);
if count > max {
diagnostics.push(build_diagnostic(
&document.source,
sentence.line,
sentence.column,
&sentence.text,
count,
max,
section_title,
));
}
}
}
diagnostics
}
}
impl ExcessiveNominalization {
fn suffixes_for(&self, language: Language) -> Vec<String> {
if !self.config.custom_suffixes.is_empty() {
return self.config.custom_suffixes.clone();
}
match language {
Language::En => EN_SUFFIXES.iter().map(|s| (*s).to_string()).collect(),
Language::Fr => FR_SUFFIXES.iter().map(|s| (*s).to_string()).collect(),
Language::Unknown => Vec::new(),
}
}
}
fn count_nominalizations(sentence: &str, suffixes: &[String]) -> u32 {
let lowered = sentence.to_lowercase();
let mut count: u32 = 0;
for word in lowered.unicode_words() {
let char_len = word.chars().count();
if char_len < MIN_WORD_LEN {
continue;
}
if suffixes
.iter()
.any(|suffix| char_len > suffix.chars().count() && word.ends_with(suffix.as_str()))
{
count = count.saturating_add(1);
}
}
count
}
#[allow(clippy::too_many_arguments)]
fn build_diagnostic(
source: &SourceFile,
line: u32,
column: u32,
sentence_text: &str,
count: u32,
max: u32,
section: Option<&str>,
) -> Diagnostic {
let length = u32::try_from(sentence_text.chars().count()).unwrap_or(u32::MAX);
let location = Location::new(source.clone(), line, column, length);
let message = format!(
"Sentence has {count} nominalizations (max {max}). Consider rewriting with action verbs \
and explicit agents."
);
let diag = Diagnostic::new(
ExcessiveNominalization::ID,
Severity::Warning,
location,
message,
);
match section {
Some(title) => diag.with_section(title),
None => diag,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::parse_plain;
use crate::types::SourceFile;
fn lint(text: &str, profile: Profile, language: Language) -> Vec<Diagnostic> {
let document = parse_plain(text, SourceFile::Anonymous);
ExcessiveNominalization::for_profile(profile).check(&document, language)
}
#[test]
fn id_is_kebab_case() {
assert_eq!(
ExcessiveNominalization::ID,
"lexicon.excessive-nominalization"
);
}
#[test]
fn action_verbs_do_not_trigger() {
let text = "We ship code. We write tests. We deploy often.";
assert!(lint(text, Profile::Public, Language::En).is_empty());
}
#[test]
fn dense_nominalization_triggers() {
let text = "The implementation of the configuration requires completion of the \
serialization and deserialization steps.";
let diags = lint(text, Profile::Public, Language::En);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("5 nominalizations"));
}
#[test]
fn dev_doc_profile_tolerates_more() {
let text = "The implementation of the configuration requires completion of the \
serialization step.";
assert!(lint(text, Profile::DevDoc, Language::En).is_empty());
assert!(!lint(text, Profile::Public, Language::En).is_empty());
}
#[test]
fn falc_profile_is_stricter() {
let text = "The implementation of the configuration requires attention.";
assert!(lint(text, Profile::Public, Language::En).is_empty());
assert!(!lint(text, Profile::Falc, Language::En).is_empty());
}
#[test]
fn french_suffixes_match_in_french() {
let text = "La réalisation de l'analyse de la conformité permettra l'identification \
des axes d'amélioration.";
let diags = lint(text, Profile::Public, Language::Fr);
assert_eq!(diags.len(), 1);
}
#[test]
fn short_words_are_ignored() {
let text = "It had age and ness and tion and sion and ence and ance.";
assert!(lint(text, Profile::Falc, Language::En).is_empty());
}
#[test]
fn case_insensitive_match() {
let text = "The IMPLEMENTATION and the COMPLETION and the CONFIGURATION and the \
NORMALIZATION overlap.";
let diags = lint(text, Profile::Public, Language::En);
assert_eq!(diags.len(), 1);
}
#[test]
fn per_sentence_scope() {
let text = "The completion is fine. The implementation of the configuration requires \
further normalization of the serialization and deserialization.";
let diags = lint(text, Profile::Public, Language::En);
assert_eq!(diags.len(), 1);
}
#[test]
fn unknown_language_skips_rule() {
let text = "Implementation configuration completion normalization serialization.";
assert!(lint(text, Profile::Public, Language::Unknown).is_empty());
}
#[test]
fn custom_suffix_list_overrides_defaults() {
let cfg = Config {
max_per_sentence: NonZeroU32::new(1).expect("non-zero literal"),
custom_suffixes: vec!["xyz".to_string()],
};
let doc = parse_plain(
"The foo and the implementation and the configuration.",
SourceFile::Anonymous,
);
let diags = ExcessiveNominalization::new(cfg).check(&doc, Language::En);
assert!(diags.is_empty());
}
#[test]
fn config_thresholds_match_rules_md() {
assert_eq!(
Config::for_profile(Profile::DevDoc).max_per_sentence.get(),
4
);
assert_eq!(
Config::for_profile(Profile::Public).max_per_sentence.get(),
3
);
assert_eq!(Config::for_profile(Profile::Falc).max_per_sentence.get(), 2);
}
#[test]
fn category_is_lexicon() {
let text = "The implementation of the configuration requires completion of the \
serialization step here.";
let diags = lint(text, Profile::Public, Language::En);
assert_eq!(diags[0].category(), crate::types::Category::Lexicon);
}
#[test]
fn snapshot_fixture() {
let text = "The implementation of the configuration requires completion of the \
serialization and deserialization steps.";
let diags = lint(text, Profile::Public, Language::En);
insta::assert_yaml_snapshot!(diags, {
".*.location.file" => "<input>",
});
}
}