use std::num::NonZeroU32;
use unicode_segmentation::UnicodeSegmentation;
use crate::condition::ConditionTag;
use crate::config::Profile;
use crate::parser::Document;
use crate::rules::Rule;
use crate::types::{Diagnostic, Language, Location, Severity};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Config {
pub min_marks: NonZeroU32,
pub window_graphemes: NonZeroU32,
}
impl Config {
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
let (min, window) = match profile {
Profile::DevDoc => (4, 30),
Profile::Public => (3, 30),
Profile::Falc => (3, 40),
};
Self {
min_marks: NonZeroU32::new(min).expect("non-zero literal"),
window_graphemes: NonZeroU32::new(window).expect("non-zero literal"),
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct DensePunctuationBurst {
config: Config,
}
impl DensePunctuationBurst {
#[must_use]
pub const fn new(config: Config) -> Self {
Self { config }
}
#[must_use]
pub fn for_profile(profile: Profile) -> Self {
Self::new(Config::for_profile(profile))
}
pub const ID: &'static str = "syntax.dense-punctuation-burst";
pub const TAGS: &'static [ConditionTag] = &[ConditionTag::General];
}
impl Rule for DensePunctuationBurst {
fn id(&self) -> &'static str {
Self::ID
}
fn condition_tags(&self) -> &'static [ConditionTag] {
Self::TAGS
}
fn check(&self, document: &Document, _language: Language) -> Vec<Diagnostic> {
let min = self.config.min_marks.get() as usize;
let window = self.config.window_graphemes.get() as usize;
let mut diagnostics = Vec::new();
for (paragraph, section_title) in document.paragraphs_with_section() {
for (line_offset, line) in paragraph.text.lines().enumerate() {
for burst in find_bursts(line, min, window) {
let line_number = paragraph
.start_line
.saturating_add(u32::try_from(line_offset).unwrap_or(u32::MAX));
let column = u32::try_from(burst.start_column).unwrap_or(u32::MAX);
let length = u32::try_from(burst.length).unwrap_or(u32::MAX);
let location =
Location::new(document.source.clone(), line_number, column, length);
let message = format!(
"{} punctuation marks within {} characters create a dense burst that is \
hard to parse. Split the clause, drop the parenthetical, or rewrite as \
a list (IFLA easy-to-read guidelines).",
burst.mark_count, burst.length
);
let mut diag = Diagnostic::new(Self::ID, Severity::Warning, location, message);
if let Some(title) = section_title {
diag = diag.with_section(title);
}
diagnostics.push(diag);
}
}
}
diagnostics
}
}
#[derive(Debug, PartialEq, Eq)]
struct Burst {
start_column: usize,
length: usize,
mark_count: usize,
}
fn find_bursts(line: &str, min: usize, window: usize) -> Vec<Burst> {
let mut bursts = Vec::new();
let columns: Vec<usize> = line
.graphemes(true)
.enumerate()
.filter_map(|(idx, g)| {
if is_qualifying_mark(g) {
Some(idx + 1) } else {
None
}
})
.collect();
if columns.len() < min {
return bursts;
}
let mut start = 0;
while start + min - 1 < columns.len() {
let end = start + min - 1;
let span = columns[end] - columns[start] + 1;
if span <= window {
let mut last = end;
while last + 1 < columns.len() && columns[last + 1] - columns[start] < window {
last += 1;
}
bursts.push(Burst {
start_column: columns[start],
length: columns[last] - columns[start] + 1,
mark_count: last - start + 1,
});
start = last + 1;
} else {
start += 1;
}
}
bursts
}
fn is_qualifying_mark(grapheme: &str) -> bool {
matches!(grapheme, "," | ";" | ":" | "—" | "–")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::{parse_markdown, parse_plain};
use crate::types::{Category, SourceFile};
fn lint(text: &str, profile: Profile) -> Vec<Diagnostic> {
let document = parse_plain(text, SourceFile::Anonymous);
DensePunctuationBurst::for_profile(profile).check(&document, Language::En)
}
fn lint_md(text: &str, profile: Profile) -> Vec<Diagnostic> {
let document = parse_markdown(text, SourceFile::Anonymous);
DensePunctuationBurst::for_profile(profile).check(&document, Language::En)
}
#[test]
fn id_is_kebab_case() {
assert_eq!(DensePunctuationBurst::ID, "syntax.dense-punctuation-burst");
}
#[test]
fn tag_is_general() {
assert_eq!(DensePunctuationBurst::TAGS, &[ConditionTag::General]);
}
#[test]
fn category_is_syntax() {
let diags = lint("a, b; c: done.", Profile::Public);
assert_eq!(diags.len(), 1);
assert_eq!(diags[0].category(), Category::Syntax);
}
#[test]
fn three_marks_in_short_span_triggers_under_public() {
let diags = lint("a, b; c: done.", Profile::Public);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("3 punctuation marks"));
}
#[test]
fn two_marks_do_not_trigger_under_public() {
assert!(lint("a, b; done.", Profile::Public).is_empty());
}
#[test]
fn marks_spread_across_long_span_do_not_trigger() {
let text = "First clause runs long, second clause also runs long; \
third clause finally arrives: done.";
assert!(lint(text, Profile::Public).is_empty());
}
#[test]
fn dev_doc_is_more_tolerant() {
let text = "a, b; c: done.";
assert!(!lint(text, Profile::Public).is_empty());
assert!(lint(text, Profile::DevDoc).is_empty());
}
#[test]
fn dev_doc_triggers_on_four_marks_in_window() {
let diags = lint("a, b; c: d, done.", Profile::DevDoc);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("4 punctuation marks"));
}
#[test]
fn falc_window_is_wider_than_public() {
let text = "abc,defghijklmnopqrstu,vwxyzabcdefghijkl,end.";
assert!(lint(text, Profile::Public).is_empty());
assert!(!lint(text, Profile::Falc).is_empty());
}
#[test]
fn em_dash_and_en_dash_qualify() {
let diags = lint("clause, sub — sub – tail.", Profile::Public);
assert_eq!(diags.len(), 1);
}
#[test]
fn period_does_not_qualify() {
assert!(lint("Done. Stop. End.", Profile::Public).is_empty());
}
#[test]
fn parenthesis_does_not_qualify() {
assert!(lint("(a) (b) (c)", Profile::Public).is_empty());
}
#[test]
fn each_burst_emits_one_diagnostic_no_overlap() {
let text = "a, b; c: done. ____________________________________ then x, y; z: end.";
let diags = lint(text, Profile::Public);
assert_eq!(diags.len(), 2);
}
#[test]
fn fenced_code_block_content_is_ignored() {
let md = "Intro.\n\n```\nfn f(a, b, c, d) {}\n```\n\nMore prose.\n";
assert!(lint_md(md, Profile::Public).is_empty());
}
#[test]
fn config_thresholds_are_as_documented() {
let dd = Config::for_profile(Profile::DevDoc);
assert_eq!(dd.min_marks.get(), 4);
assert_eq!(dd.window_graphemes.get(), 30);
let pub_ = Config::for_profile(Profile::Public);
assert_eq!(pub_.min_marks.get(), 3);
assert_eq!(pub_.window_graphemes.get(), 30);
let fa = Config::for_profile(Profile::Falc);
assert_eq!(fa.min_marks.get(), 3);
assert_eq!(fa.window_graphemes.get(), 40);
}
#[test]
fn snapshot_fixture() {
let text = "Short, dense; burst: here. Then a clean sentence.";
let diags = lint(text, Profile::Public);
insta::assert_yaml_snapshot!(diags, {
".*.location.file" => "<input>",
});
}
}