use std::num::NonZeroU32;
use unicode_segmentation::UnicodeSegmentation;
use crate::condition::ConditionTag;
use crate::config::Profile;
use crate::parser::Document;
use crate::rules::Rule;
use crate::types::{Diagnostic, Language, Location, Severity};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Config {
pub min_run_length: NonZeroU32,
}
impl Config {
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
let min = match profile {
Profile::DevDoc => 6,
Profile::Public => 5,
Profile::Falc => 4,
};
Self {
min_run_length: NonZeroU32::new(min).expect("non-zero literal"),
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct ConsonantCluster {
config: Config,
}
impl ConsonantCluster {
#[must_use]
pub const fn new(config: Config) -> Self {
Self { config }
}
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
Self::new(Config::for_profile(profile))
}
pub const ID: &'static str = "lexicon.consonant-cluster";
pub const TAGS: &'static [ConditionTag] = &[ConditionTag::Dyslexia, ConditionTag::General];
}
impl Rule for ConsonantCluster {
fn id(&self) -> &'static str {
Self::ID
}
fn condition_tags(&self) -> &'static [ConditionTag] {
Self::TAGS
}
fn check(&self, document: &Document, language: Language) -> Vec<Diagnostic> {
let min = self.config.min_run_length.get();
let mut diagnostics = Vec::new();
for (paragraph, section_title) in document.paragraphs_with_section() {
for (line_offset, line) in paragraph.text.lines().enumerate() {
for hit in find_clusters(line, language, min) {
let line_number = paragraph
.start_line
.saturating_add(u32::try_from(line_offset).unwrap_or(u32::MAX));
let column = u32::try_from(hit.column).unwrap_or(u32::MAX);
let length = u32::try_from(hit.word.chars().count()).unwrap_or(u32::MAX);
let location =
Location::new(document.source.clone(), line_number, column, length);
let message = format!(
"Word \"{}\" contains a run of {} consecutive consonants. Dense \
consonant clusters are a decoding barrier for dyslexic readers \
(BDA Dyslexia Style Guide). Consider a shorter or more common \
synonym.",
hit.word, hit.run_length
);
let mut diag = Diagnostic::new(Self::ID, Severity::Warning, location, message);
if let Some(title) = section_title {
diag = diag.with_section(title);
}
diagnostics.push(diag);
}
}
}
diagnostics
}
}
#[derive(Debug)]
struct Hit {
word: String,
run_length: u32,
column: usize,
}
fn find_clusters(line: &str, language: Language, min: u32) -> Vec<Hit> {
let mut hits = Vec::new();
let mut current_word = String::new();
let mut current_start_col: Option<usize> = None;
let mut run = 0u32;
let mut max_run = 0u32;
for (idx, grapheme) in line.graphemes(true).enumerate() {
let col = idx + 1;
let first = grapheme.chars().next().unwrap_or(' ');
if is_word_char(first) {
if current_start_col.is_none() {
current_start_col = Some(col);
}
current_word.push_str(grapheme);
if is_consonant(first, language) {
run += 1;
if run > max_run {
max_run = run;
}
} else {
run = 0;
}
} else {
if let Some(start) = current_start_col.take() {
if max_run >= min {
hits.push(Hit {
word: std::mem::take(&mut current_word),
run_length: max_run,
column: start,
});
} else {
current_word.clear();
}
}
run = 0;
max_run = 0;
}
}
if let Some(start) = current_start_col {
if max_run >= min {
hits.push(Hit {
word: current_word,
run_length: max_run,
column: start,
});
}
}
hits
}
fn is_word_char(c: char) -> bool {
c.is_alphabetic()
}
fn is_consonant(c: char, language: Language) -> bool {
if !c.is_alphabetic() {
return false;
}
!is_vowel(c, language)
}
fn is_vowel(c: char, language: Language) -> bool {
let lower = c.to_lowercase().next().unwrap_or(c);
if matches!(lower, 'a' | 'e' | 'i' | 'o' | 'u' | 'y') {
return true;
}
match language {
Language::Fr => matches!(
lower,
'à' | 'â'
| 'ä'
| 'æ'
| 'é'
| 'è'
| 'ê'
| 'ë'
| 'î'
| 'ï'
| 'ô'
| 'ö'
| 'œ'
| 'ù'
| 'û'
| 'ü'
| 'ÿ'
),
Language::En | Language::Unknown => matches!(
lower,
'à' | 'â' | 'ä' | 'é' | 'è' | 'ê' | 'ë' | 'î' | 'ï' | 'ô' | 'ö' | 'ù' | 'û' | 'ü'
),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::{parse_markdown, parse_plain};
use crate::types::{Category, SourceFile};
fn lint(text: &str, profile: Profile, language: Language) -> Vec<Diagnostic> {
let document = parse_plain(text, SourceFile::Anonymous);
ConsonantCluster::for_profile(profile).check(&document, language)
}
fn lint_md(text: &str, profile: Profile, language: Language) -> Vec<Diagnostic> {
let document = parse_markdown(text, SourceFile::Anonymous);
ConsonantCluster::for_profile(profile).check(&document, language)
}
#[test]
fn id_is_kebab_case() {
assert_eq!(ConsonantCluster::ID, "lexicon.consonant-cluster");
}
#[test]
fn tags_carry_dyslexia_and_general() {
assert!(ConsonantCluster::TAGS.contains(&ConditionTag::Dyslexia));
assert!(ConsonantCluster::TAGS.contains(&ConditionTag::General));
}
#[test]
fn category_is_lexicon() {
let diags = lint("The strengths matter.", Profile::Public, Language::En);
assert_eq!(diags.len(), 1);
assert_eq!(diags[0].category(), Category::Lexicon);
}
#[test]
fn common_words_do_not_trigger_under_public() {
assert!(lint(
"The quick brown fox jumps over the lazy dog.",
Profile::Public,
Language::En
)
.is_empty());
}
#[test]
fn strengths_triggers_under_public() {
let diags = lint("Our strengths matter.", Profile::Public, Language::En);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("\"strengths\""));
assert!(diags[0].message.contains("5 consecutive"));
}
#[test]
fn falc_catches_shorter_runs() {
assert!(lint("A strict test.", Profile::Falc, Language::En).is_empty());
assert!(lint("It shrinks.", Profile::Falc, Language::En).is_empty());
let diags = lint("The twelfths shifted.", Profile::Falc, Language::En);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("\"twelfths\""));
}
#[test]
fn dev_doc_tolerates_five_run() {
assert!(!lint("Strengths.", Profile::Public, Language::En).is_empty());
assert!(lint("Strengths.", Profile::DevDoc, Language::En).is_empty());
}
#[test]
fn hyphen_breaks_the_word() {
assert!(
lint("A dys-lexic reader.", Profile::Falc, Language::En).is_empty(),
"hyphen must break the word"
);
}
#[test]
fn apostrophe_breaks_the_word() {
assert!(lint("rock'n'roll", Profile::Falc, Language::En).is_empty());
}
#[test]
fn fr_accented_vowel_is_not_a_consonant() {
assert!(lint("C'est étranger.", Profile::Falc, Language::Fr).is_empty());
}
#[test]
fn fr_real_cluster_triggers() {
let diags = lint(
"Les constructions sont claires.",
Profile::Falc,
Language::Fr,
);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("\"constructions\""));
}
#[test]
fn uppercase_words_also_trigger() {
let diags = lint("STRENGTHS.", Profile::Public, Language::En);
assert_eq!(diags.len(), 1);
}
#[test]
fn y_is_treated_as_a_vowel() {
assert!(lint("The rhythm.", Profile::Falc, Language::En).is_empty());
}
#[test]
fn fenced_code_block_content_is_ignored() {
let md = "Intro.\n\n```\nstrengths twelfths\n```\n\nPlain prose.\n";
assert!(lint_md(md, Profile::Public, Language::En).is_empty());
}
#[test]
fn config_thresholds_are_as_documented() {
assert_eq!(Config::for_profile(Profile::DevDoc).min_run_length.get(), 6);
assert_eq!(Config::for_profile(Profile::Public).min_run_length.get(), 5);
assert_eq!(Config::for_profile(Profile::Falc).min_run_length.get(), 4);
}
#[test]
fn snapshot_fixture() {
let text = "Our strengths and twelfths shifted. All clear.";
let diags = lint(text, Profile::Public, Language::En);
insta::assert_yaml_snapshot!(diags, {
".*.location.file" => "<input>",
});
}
}