use std::collections::{BTreeMap, HashSet};
use crate::explain::DOCS_BASE;
use crate::types::Diagnostic;
pub(super) const MAX_HINTS: usize = 5;
pub(super) const ACRONYM_HINT_THRESHOLD: usize = 3;
pub(super) const HOTSPOT_HINT_THRESHOLD: usize = 10;
pub(super) const ACRONYM_HINT_TOP_N: usize = 3;
const UNEXPLAINED_ABBREVIATION: &str = "lexicon.unexplained-abbreviation";
pub(super) fn hints(diagnostics: &[Diagnostic]) -> Vec<String> {
let mut out = Vec::new();
let (acronym_lines, covered_pairs) = acronym_hints(diagnostics);
out.extend(acronym_lines);
for line in hotspot_hints(diagnostics, &covered_pairs) {
if out.len() >= MAX_HINTS {
break;
}
out.push(line);
}
out.truncate(MAX_HINTS);
out
}
fn acronym_hints(diagnostics: &[Diagnostic]) -> (Vec<String>, HashSet<(String, String)>) {
let mut counts: BTreeMap<String, usize> = BTreeMap::new();
let mut files_for_token: BTreeMap<String, HashSet<String>> = BTreeMap::new();
for diag in diagnostics {
if diag.rule_id != UNEXPLAINED_ABBREVIATION {
continue;
}
let Some(token) = extract_acronym_token(&diag.message) else {
continue;
};
*counts.entry(token.clone()).or_insert(0) += 1;
files_for_token
.entry(token)
.or_default()
.insert(diag.location.file.to_string());
}
let mut ranked: Vec<(String, usize)> = counts
.into_iter()
.filter(|(_, n)| *n >= ACRONYM_HINT_THRESHOLD)
.collect();
ranked.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
let mut covered: HashSet<(String, String)> = HashSet::new();
let mut lines = Vec::new();
for (token, n) in ranked.into_iter().take(ACRONYM_HINT_TOP_N) {
lines.push(format!(
"add \"{token}\" to [rules.{UNEXPLAINED_ABBREVIATION}].whitelist ({n} hits suppressed)"
));
if let Some(files) = files_for_token.get(&token) {
for file in files {
covered.insert((file.clone(), UNEXPLAINED_ABBREVIATION.to_string()));
}
}
}
(lines, covered)
}
fn hotspot_hints(diagnostics: &[Diagnostic], covered: &HashSet<(String, String)>) -> Vec<String> {
let mut counts: BTreeMap<(String, String), usize> = BTreeMap::new();
let mut order: Vec<(String, String)> = Vec::new();
for diag in diagnostics {
let key = (diag.location.file.to_string(), diag.rule_id.clone());
if !counts.contains_key(&key) {
order.push(key.clone());
}
*counts.entry(key).or_insert(0) += 1;
}
let mut lines = Vec::new();
for key in order {
let n = counts[&key];
if n < HOTSPOT_HINT_THRESHOLD {
continue;
}
if covered.contains(&key) {
continue;
}
let (file, rule_id) = key;
lines.push(format!(
"{rule_id} dominates {file} ({n} hits) — see {DOCS_BASE}/rules/{rule_id}"
));
}
lines
}
fn extract_acronym_token(message: &str) -> Option<String> {
let start = message.find('"')?;
let rest = &message[start + 1..];
let end = rest.find('"')?;
let token = &rest[..end];
if token.is_empty() {
None
} else {
Some(token.to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{Location, Severity, SourceFile};
fn diag(rule_id: &str, file: SourceFile, message: &str) -> Diagnostic {
Diagnostic::new(
rule_id,
Severity::Warning,
Location::new(file, 1, 1, 1),
message,
)
}
fn acronym(file: SourceFile, token: &str) -> Diagnostic {
diag(
UNEXPLAINED_ABBREVIATION,
file,
&format!("Acronym \"{token}\" is not defined on first use."),
)
}
fn anon_acronym(token: &str) -> Diagnostic {
acronym(SourceFile::Anonymous, token)
}
#[test]
fn silent_when_no_diagnostics_qualify() {
let diags: Vec<Diagnostic> = (0..2).map(|_| anon_acronym("HTTP")).collect();
assert!(hints(&diags).is_empty());
}
#[test]
fn acronym_hint_fires_at_threshold() {
let diags: Vec<Diagnostic> = (0..ACRONYM_HINT_THRESHOLD)
.map(|_| anon_acronym("HTTP"))
.collect();
let lines = hints(&diags);
assert_eq!(lines.len(), 1);
assert!(lines[0].contains("\"HTTP\""));
assert!(lines[0].contains("3 hits suppressed"));
}
#[test]
fn acronym_hint_caps_at_top_n_and_orders_by_count() {
let mut diags = Vec::new();
diags.extend((0..5).map(|_| anon_acronym("HTTP")));
diags.extend((0..4).map(|_| anon_acronym("WCAG")));
diags.extend((0..3).map(|_| anon_acronym("RGAA")));
diags.extend((0..3).map(|_| anon_acronym("FALC")));
let lines = hints(&diags);
assert_eq!(lines.len(), ACRONYM_HINT_TOP_N);
assert!(lines[0].contains("\"HTTP\""));
assert!(lines[1].contains("\"WCAG\""));
assert!(lines[2].contains("\"FALC\""));
}
#[test]
fn hotspot_hint_fires_at_threshold() {
let path = SourceFile::Path(std::path::PathBuf::from("README.md"));
let diags: Vec<Diagnostic> = (0..HOTSPOT_HINT_THRESHOLD)
.map(|_| diag("structure.sentence-too-long", path.clone(), "msg"))
.collect();
let lines = hints(&diags);
assert_eq!(lines.len(), 1);
assert!(lines[0].contains("structure.sentence-too-long dominates"));
assert!(lines[0].contains("README.md"));
assert!(lines[0].contains("10 hits"));
}
#[test]
fn hotspot_hint_silent_below_threshold() {
let path = SourceFile::Path(std::path::PathBuf::from("README.md"));
let diags: Vec<Diagnostic> = (0..HOTSPOT_HINT_THRESHOLD - 1)
.map(|_| diag("structure.sentence-too-long", path.clone(), "msg"))
.collect();
assert!(hints(&diags).is_empty());
}
#[test]
fn acronym_path_suppresses_hotspot_for_same_file_rule_pair() {
let path = SourceFile::Path(std::path::PathBuf::from("notes.md"));
let diags: Vec<Diagnostic> = (0..12).map(|_| acronym(path.clone(), "HTTP")).collect();
let lines = hints(&diags);
assert_eq!(lines.len(), 1);
assert!(lines[0].contains("whitelist"));
}
#[test]
fn block_caps_at_max_hints() {
let mut diags = Vec::new();
diags.extend((0..3).map(|_| anon_acronym("HTTP")));
diags.extend((0..3).map(|_| anon_acronym("WCAG")));
diags.extend((0..3).map(|_| anon_acronym("RGAA")));
for name in ["a.md", "b.md", "c.md"] {
let path = SourceFile::Path(std::path::PathBuf::from(name));
diags.extend(
(0..HOTSPOT_HINT_THRESHOLD)
.map(|_| diag("structure.sentence-too-long", path.clone(), "msg")),
);
}
let lines = hints(&diags);
assert_eq!(lines.len(), MAX_HINTS);
}
#[test]
fn extract_acronym_token_handles_canonical_message() {
assert_eq!(
extract_acronym_token("Acronym \"WCAG\" is not defined on first use."),
Some("WCAG".to_string())
);
}
#[test]
fn extract_acronym_token_returns_none_on_unexpected_shape() {
assert!(extract_acronym_token("Acronym WCAG not defined.").is_none());
assert!(extract_acronym_token("Acronym \"\" is not defined.").is_none());
}
}