use std::num::NonZeroU32;
use crate::condition::ConditionTag;
use crate::config::Profile;
use crate::parser::Document;
use crate::rules::Rule;
use crate::types::{Diagnostic, Language, Location, Severity};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Config {
pub min_run_length: NonZeroU32,
}
impl Config {
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
let min = match profile {
Profile::DevDoc => 3,
Profile::Public | Profile::Falc => 2,
};
Self {
min_run_length: NonZeroU32::new(min).expect("non-zero literal"),
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct AllCapsShouting {
config: Config,
}
impl AllCapsShouting {
#[must_use]
pub const fn new(config: Config) -> Self {
Self { config }
}
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
Self::new(Config::for_profile(profile))
}
pub const ID: &'static str = "lexicon.all-caps-shouting";
pub const TAGS: &'static [ConditionTag] = &[
ConditionTag::A11yMarkup,
ConditionTag::Dyslexia,
ConditionTag::General,
];
}
impl Rule for AllCapsShouting {
fn id(&self) -> &'static str {
Self::ID
}
fn condition_tags(&self) -> &'static [ConditionTag] {
Self::TAGS
}
fn check(&self, document: &Document, _language: Language) -> Vec<Diagnostic> {
let min_run = self.config.min_run_length.get();
let mut diagnostics = Vec::new();
for (paragraph, section_title) in document.paragraphs_with_section() {
for run in find_caps_runs(¶graph.text, min_run) {
let line = paragraph.start_line + run.line_offset;
let column = run.column;
let length = u32::try_from(run.text.chars().count()).unwrap_or(u32::MAX);
let location = Location::new(document.source.clone(), line, column, length);
let message = format!(
"{} consecutive ALL-CAPS words read as shouting and degrade legibility for \
dyslexic readers. Use sentence case and rely on emphasis (italics, bold) or \
a callout instead.",
run.word_count
);
let mut diag = Diagnostic::new(Self::ID, Severity::Warning, location, message);
if let Some(title) = section_title {
diag = diag.with_section(title);
}
diagnostics.push(diag);
}
}
diagnostics
}
}
#[derive(Debug)]
struct Run {
text: String,
word_count: u32,
line_offset: u32,
column: u32,
}
fn find_caps_runs(text: &str, min_run: u32) -> Vec<Run> {
let mut runs = Vec::new();
let mut current: Vec<(&str, u32, u32)> = Vec::new();
for (line_offset, line) in text.lines().enumerate() {
let line_offset = u32::try_from(line_offset).unwrap_or(u32::MAX);
let mut col: u32 = 0; let mut chars = line.char_indices().peekable();
while let Some(&(byte_idx, ch)) = chars.peek() {
col = col.saturating_add(1);
if is_word_char(ch) {
let word_start_byte = byte_idx;
let word_start_col = col;
let mut last_byte_end = byte_idx + ch.len_utf8();
chars.next();
while let Some(&(b, c)) = chars.peek() {
if is_word_char(c) {
col = col.saturating_add(1);
last_byte_end = b + c.len_utf8();
chars.next();
} else {
break;
}
}
let word = &line[word_start_byte..last_byte_end];
if is_all_caps_word(word) {
current.push((word, line_offset, word_start_col));
} else {
flush_run(&mut current, min_run, &mut runs);
}
} else {
if !matches!(ch, ',' | ';' | ':' | '-' | ' ' | '\t') {
flush_run(&mut current, min_run, &mut runs);
}
chars.next();
}
}
flush_run(&mut current, min_run, &mut runs);
}
flush_run(&mut current, min_run, &mut runs);
runs
}
fn flush_run(current: &mut Vec<(&str, u32, u32)>, min_run: u32, runs: &mut Vec<Run>) {
let count = u32::try_from(current.len()).unwrap_or(u32::MAX);
if let (true, Some(first), Some(last)) = (
count >= min_run,
current.first().copied(),
current.last().copied(),
) {
let span_text = format!(
"{}…{}",
first.0,
last.0
);
runs.push(Run {
text: span_text,
word_count: count,
line_offset: first.1,
column: first.2,
});
}
current.clear();
}
fn is_word_char(c: char) -> bool {
c.is_alphanumeric() || c == '\''
}
fn is_all_caps_word(word: &str) -> bool {
let mut letters = 0u32;
let mut has_lower = false;
for c in word.chars() {
if c.is_alphabetic() {
letters += 1;
if c.is_lowercase() {
has_lower = true;
break;
}
}
}
!has_lower && letters >= 2
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::{parse_markdown, parse_plain};
use crate::types::{Category, SourceFile};
fn lint(text: &str, profile: Profile) -> Vec<Diagnostic> {
let document = parse_plain(text, SourceFile::Anonymous);
AllCapsShouting::for_profile(profile).check(&document, Language::En)
}
fn lint_md(text: &str, profile: Profile) -> Vec<Diagnostic> {
let document = parse_markdown(text, SourceFile::Anonymous);
AllCapsShouting::for_profile(profile).check(&document, Language::En)
}
#[test]
fn id_is_kebab_case() {
assert_eq!(AllCapsShouting::ID, "lexicon.all-caps-shouting");
}
#[test]
fn tags_carry_a11y_markup_dyslexia_general() {
assert!(AllCapsShouting::TAGS.contains(&ConditionTag::A11yMarkup));
assert!(AllCapsShouting::TAGS.contains(&ConditionTag::Dyslexia));
assert!(AllCapsShouting::TAGS.contains(&ConditionTag::General));
}
#[test]
fn category_is_lexicon() {
let text = "DO NOT TOUCH the wires.";
let diags = lint(text, Profile::Public);
assert!(!diags.is_empty());
assert_eq!(diags[0].category(), Category::Lexicon);
}
#[test]
fn single_all_caps_word_is_an_abbreviation_not_shouting() {
assert!(lint("The WCAG standard helps.", Profile::Public).is_empty());
}
#[test]
fn two_consecutive_all_caps_words_trigger_under_public() {
let diags = lint("Please DO NOT touch this.", Profile::Public);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("2 consecutive"));
}
#[test]
fn dev_doc_tolerates_a_two_word_run() {
assert!(lint("Please DO NOT touch this.", Profile::DevDoc).is_empty());
let diags = lint("Please DO NOT EVER touch this.", Profile::DevDoc);
assert_eq!(diags.len(), 1);
}
#[test]
fn falc_flags_two_word_runs() {
let diags = lint("Please DO NOT touch this.", Profile::Falc);
assert_eq!(diags.len(), 1);
}
#[test]
fn all_caps_run_with_minor_punctuation_stays_together() {
let diags = lint("Please READ ME, THEN ACT.", Profile::Public);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("4 consecutive"));
}
#[test]
fn lowercase_word_breaks_the_run() {
let diags = lint("DO and NOT.", Profile::Public);
assert!(diags.is_empty());
}
#[test]
fn period_breaks_the_run() {
let diags = lint("Please DO. NOT acceptable.", Profile::Public);
assert!(diags.is_empty());
}
#[test]
fn acronym_followed_by_real_word_does_not_count() {
assert!(lint("WCAG covers many cases.", Profile::Public).is_empty());
}
#[test]
fn two_acronyms_separated_by_a_word_do_not_form_a_run() {
assert!(lint("WCAG and ARIA help readers.", Profile::Public).is_empty());
}
#[test]
fn fenced_code_block_content_is_ignored() {
let md = "Intro paragraph.\n\n```\nDO NOT TOUCH\n```\n\nMore prose.\n";
assert!(lint_md(md, Profile::Public).is_empty());
}
#[test]
fn multiword_acronym_chain_in_plain_prose_triggers() {
let diags = lint("Configure API HTTP TLS routing.", Profile::Public);
assert_eq!(diags.len(), 1);
}
#[test]
fn config_thresholds_are_as_documented() {
assert_eq!(Config::for_profile(Profile::DevDoc).min_run_length.get(), 3);
assert_eq!(Config::for_profile(Profile::Public).min_run_length.get(), 2);
assert_eq!(Config::for_profile(Profile::Falc).min_run_length.get(), 2);
}
#[test]
fn snapshot_fixture() {
let text = "Short and clean. Please DO NOT touch this. Fine again.";
let diags = lint(text, Profile::Public);
insta::assert_yaml_snapshot!(diags, {
".*.location.file" => "<input>",
});
}
}