use std::collections::HashSet;
use std::sync::LazyLock;
use crate::config::Profile;
use crate::parser::phrase_search::{find_word_bounded, line_column_at};
use crate::parser::Document;
use crate::rules::Rule;
use crate::types::{Diagnostic, Language, Location, Severity, SourceFile};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum JargonList {
Tech,
Legal,
Medical,
Admin,
}
impl JargonList {
fn entries(self) -> &'static [&'static str] {
match self {
Self::Tech => &TECH,
Self::Legal => &LEGAL,
Self::Medical => &MEDICAL,
Self::Admin => &ADMIN,
}
}
}
static TECH: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
"idempotent",
"orthogonal",
"deterministic",
"polymorphic",
"serialization",
"deserialization",
"synchronous",
"asynchronous",
"concurrency",
"thread-safe",
"side-effect",
"referential transparency",
"memoization",
"currying",
"hoisting",
"closure",
"monad",
"immutable",
"stateless",
"refactoring",
]
});
static LEGAL: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
"apériteur",
"clause résolutoire",
"force majeure",
"cessation de paiement",
"préjudice subi",
"onéreux",
"nonobstant",
"préalablement",
"susmentionné",
"infra",
"supra",
"ad hoc",
"de facto",
"in fine",
"subséquemment",
]
});
static MEDICAL: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
"anamnèse",
"étiologie",
"pathognomonique",
"iatrogène",
"nosocomial",
"pronostic vital engagé",
"décompensation",
"récidive",
"rémission",
"syndromique",
]
});
static ADMIN: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
"attributaire",
"solliciter",
"diligenter",
"instruction du dossier",
"pièces justificatives",
"circulaire",
"délibération",
"arrêté préfectoral",
"transmission des pièces",
"ayant droit",
]
});
#[derive(Debug, Clone, Default)]
pub struct Config {
pub active_lists: Vec<JargonList>,
pub custom: Vec<String>,
pub whitelist: Vec<String>,
}
impl Config {
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
let active_lists = match profile {
Profile::DevDoc => Vec::new(),
Profile::Public | Profile::Falc => vec![
JargonList::Tech,
JargonList::Legal,
JargonList::Medical,
JargonList::Admin,
],
};
Self {
active_lists,
..Self::default()
}
}
}
#[derive(Debug, Clone)]
pub struct JargonUndefined {
config: Config,
}
impl JargonUndefined {
#[must_use]
pub const fn new(config: Config) -> Self {
Self { config }
}
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
Self::new(Config::for_profile(profile))
}
pub const ID: &'static str = "lexicon.jargon-undefined";
fn phrases(&self) -> Vec<String> {
let whitelist: HashSet<&str> = self.config.whitelist.iter().map(String::as_str).collect();
self.config
.active_lists
.iter()
.flat_map(|list| list.entries().iter().copied().map(str::to_string))
.chain(self.config.custom.iter().cloned())
.filter(|term| !whitelist.contains(term.as_str()))
.collect()
}
}
impl Rule for JargonUndefined {
fn id(&self) -> &'static str {
Self::ID
}
fn check(&self, document: &Document, _language: Language) -> Vec<Diagnostic> {
let phrases = self.phrases();
if phrases.is_empty() {
return Vec::new();
}
let mut diagnostics = Vec::new();
for (paragraph, section_title) in document.paragraphs_with_section() {
let lowered = paragraph.text.to_lowercase();
for phrase in &phrases {
for byte_offset in find_word_bounded(&lowered, phrase) {
let (line_offset, column) = line_column_at(¶graph.text, byte_offset);
let line = paragraph.start_line.saturating_add(line_offset);
diagnostics.push(build_diagnostic(
&document.source,
line,
column,
phrase,
section_title,
));
}
}
}
diagnostics.sort_by_key(|d| (d.location.line, d.location.column));
diagnostics
}
}
fn build_diagnostic(
source: &SourceFile,
line: u32,
column: u32,
term: &str,
section: Option<&str>,
) -> Diagnostic {
let length = u32::try_from(term.chars().count()).unwrap_or(u32::MAX);
let location = Location::new(source.clone(), line, column, length);
let message = format!(
"Jargon term \"{term}\" may be unfamiliar to non-specialists. Define it on first use or \
replace with a simpler phrase."
);
let diag = Diagnostic::new(JargonUndefined::ID, Severity::Warning, location, message);
match section {
Some(title) => diag.with_section(title),
None => diag,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::parse_plain;
use crate::types::SourceFile;
fn lint(text: &str, profile: Profile) -> Vec<Diagnostic> {
let document = parse_plain(text, SourceFile::Anonymous);
JargonUndefined::for_profile(profile).check(&document, Language::En)
}
#[test]
fn id_is_kebab_case() {
assert_eq!(JargonUndefined::ID, "lexicon.jargon-undefined");
}
#[test]
fn dev_doc_does_not_flag_anything() {
let text = "The function is idempotent and thread-safe.";
assert!(lint(text, Profile::DevDoc).is_empty());
}
#[test]
fn public_flags_tech_jargon() {
let diags = lint("The function is idempotent.", Profile::Public);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("idempotent"));
}
#[test]
fn hyphenated_term_matches_as_phrase() {
let diags = lint("Use a thread-safe queue here.", Profile::Public);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("thread-safe"));
}
#[test]
fn multi_word_term_matches() {
let diags = lint(
"Invoke memoization or referential transparency.",
Profile::Public,
);
assert_eq!(diags.len(), 2);
}
#[test]
fn french_legal_term_matches() {
let diags = lint(
"La force majeure a entraîné une cessation de paiement.",
Profile::Public,
);
assert_eq!(diags.len(), 2);
}
#[test]
fn case_insensitive_match() {
let diags = lint("Deterministic and Idempotent behaviour.", Profile::Public);
assert_eq!(diags.len(), 2);
}
#[test]
fn word_boundary_prevents_partial_match() {
assert!(lint("He studies monads here.", Profile::Public).is_empty());
}
#[test]
fn whitelist_silences_specific_term() {
let cfg = Config {
active_lists: vec![JargonList::Tech],
whitelist: vec!["idempotent".to_string()],
..Config::default()
};
let doc = parse_plain(
"The function is idempotent and deterministic.",
SourceFile::Anonymous,
);
let diags = JargonUndefined::new(cfg).check(&doc, Language::En);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("deterministic"));
}
#[test]
fn custom_term_is_added() {
let cfg = Config {
active_lists: vec![JargonList::Tech],
custom: vec!["yak shaving".to_string()],
..Config::default()
};
let doc = parse_plain("Too much yak shaving today.", SourceFile::Anonymous);
let diags = JargonUndefined::new(cfg).check(&doc, Language::En);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("yak shaving"));
}
#[test]
fn category_is_lexicon() {
let diags = lint("A polymorphic wrapper.", Profile::Public);
assert_eq!(diags[0].category(), crate::types::Category::Lexicon);
}
#[test]
fn snapshot_fixture() {
let text = "The wrapper is idempotent. It ensures thread-safe serialization of records.";
let diags = lint(text, Profile::Public);
insta::assert_yaml_snapshot!(diags, {
".*.location.file" => "<input>",
});
}
}