use crate::lint_context::LintContext;
use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
use crate::rule_config_serde::RuleConfig;
use crate::utils::range_utils::calculate_match_range;
use crate::utils::skip_context::{compute_html_code_ranges, should_skip_emphasis_span};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy)]
struct CountedSpan {
start: usize,
end: usize,
line: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EmphasisTarget {
#[default]
Strong,
Emphasis,
All,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "kebab-case")]
pub struct MD081Config {
#[serde(default)]
pub targets: EmphasisTarget,
#[serde(default)]
pub max_per_paragraph: Option<usize>,
#[serde(default)]
pub max_consecutive: Option<usize>,
}
impl Default for MD081Config {
fn default() -> Self {
Self {
targets: EmphasisTarget::Strong,
max_per_paragraph: None,
max_consecutive: None,
}
}
}
impl RuleConfig for MD081Config {
const RULE_NAME: &'static str = "MD081";
}
#[derive(Debug, Clone, Default)]
pub struct MD081NoExcessiveEmphasis {
config: MD081Config,
}
impl MD081NoExcessiveEmphasis {
pub fn new() -> Self {
Self::default()
}
pub fn from_config_struct(config: MD081Config) -> Self {
Self { config }
}
fn counted_spans(&self, ctx: &LintContext) -> Vec<CountedSpan> {
let html_tags = ctx.html_tags();
let html_code_ranges = compute_html_code_ranges(&html_tags);
let mut spans: Vec<CountedSpan> = ctx
.emphasis_spans()
.iter()
.filter(|s| match self.config.targets {
EmphasisTarget::Strong => s.is_strong,
EmphasisTarget::Emphasis => !s.is_strong,
EmphasisTarget::All => true,
})
.filter(|s| !should_skip_emphasis_span(ctx, &html_tags, &html_code_ranges, s.byte_offset))
.map(|s| CountedSpan {
start: s.byte_offset,
end: s.byte_end,
line: s.line,
})
.collect();
spans.sort_by_key(|s| (s.start, std::cmp::Reverse(s.end)));
if self.config.targets == EmphasisTarget::All {
let mut deduped: Vec<CountedSpan> = Vec::with_capacity(spans.len());
let mut max_end = 0usize;
for span in spans {
if span.end <= max_end {
continue;
}
max_end = span.end;
deduped.push(span);
}
deduped
} else {
spans
}
}
fn setext_text_lines(ctx: &LintContext) -> Vec<bool> {
let mut flags = vec![false; ctx.lines.len()];
for (idx, line) in ctx.lines.iter().enumerate() {
if idx == 0 || line.in_code_block {
continue;
}
let text = Self::line_inner(line, ctx.content);
let is_underline = !text.is_empty() && (text.bytes().all(|b| b == b'=') || text.bytes().all(|b| b == b'-'));
if !is_underline {
continue;
}
let level = Self::blockquote_level(line);
let mut j = idx;
while j > 0 {
let prev = &ctx.lines[j - 1];
if prev.is_blank
|| !prev.is_paragraph_context()
|| prev.list_item.is_some()
|| Self::blockquote_level(prev) != level
{
break;
}
flags[j - 1] = true;
j -= 1;
}
}
flags
}
fn line_inner<'a>(line: &'a crate::lint_context::LineInfo, source: &'a str) -> &'a str {
match line.blockquote.as_ref() {
Some(bq) => bq.content.trim(),
None => line.content(source).trim(),
}
}
fn blockquote_level(line: &crate::lint_context::LineInfo) -> usize {
line.blockquote.as_ref().map_or(0, |b| b.nesting_level)
}
fn paragraph_ids(ctx: &LintContext) -> Vec<Option<usize>> {
let mut ids = vec![None; ctx.lines.len()];
let setext_text = Self::setext_text_lines(ctx);
let mut current: Option<usize> = None;
let mut next_id = 0usize;
let mut prev_bq_level = 0usize;
for (idx, line) in ctx.lines.iter().enumerate() {
let bq_level = Self::blockquote_level(line);
let is_prose =
!line.is_blank && line.is_paragraph_context() && !setext_text[idx] && !ctx.is_in_table_block(idx + 1);
if !is_prose {
current = None;
prev_bq_level = bq_level;
continue;
}
let starts_new = current.is_none() || line.list_item.is_some() || bq_level != prev_bq_level;
if starts_new {
current = Some(next_id);
next_id += 1;
}
ids[idx] = current;
prev_bq_level = bq_level;
}
ids
}
fn emit_run(&self, ctx: &LintContext, run: &[CountedSpan], limit: usize, warnings: &mut Vec<LintWarning>) {
if run.len() > limit
&& let Some(first) = run.first()
{
warnings.push(self.warn_at(
ctx,
first,
format!(
"{} consecutive emphasis spans (limit {limit}); consider rephrasing to reduce emphasis",
run.len(),
),
));
}
}
fn warn_at(&self, ctx: &LintContext, span: &CountedSpan, message: String) -> LintWarning {
let line_content = ctx.lines.get(span.line - 1).map_or("", |l| l.content(ctx.content));
let line_start = ctx.lines.get(span.line - 1).map_or(0, |l| l.byte_offset);
let match_start_in_line = span.start.saturating_sub(line_start);
let (start_line, start_col, end_line, end_col) =
calculate_match_range(span.line, line_content, match_start_in_line, span.end - span.start);
LintWarning {
rule_name: Some(self.name().to_string()),
severity: Severity::Warning,
line: start_line,
column: start_col,
end_line,
end_column: end_col,
message,
fix: None,
}
}
}
impl Rule for MD081NoExcessiveEmphasis {
fn name(&self) -> &'static str {
"MD081"
}
fn description(&self) -> &'static str {
"Inline emphasis should not be excessive"
}
fn category(&self) -> RuleCategory {
RuleCategory::Emphasis
}
fn check(&self, ctx: &LintContext) -> LintResult {
if self.config.max_per_paragraph.is_none() && self.config.max_consecutive.is_none() {
return Ok(Vec::new());
}
let spans = self.counted_spans(ctx);
if spans.is_empty() {
return Ok(Vec::new());
}
let para_ids = Self::paragraph_ids(ctx);
let mut warnings = Vec::new();
if let Some(limit) = self.config.max_per_paragraph {
let mut counts: std::collections::HashMap<usize, (usize, CountedSpan)> = std::collections::HashMap::new();
for span in &spans {
let Some(pid) = para_ids.get(span.line - 1).copied().flatten() else {
continue;
};
counts.entry(pid).and_modify(|(n, _)| *n += 1).or_insert((1, *span));
}
let mut flagged: Vec<(usize, CountedSpan)> = counts
.into_iter()
.filter(|(_, (n, _))| *n > limit)
.map(|(_, (n, first))| (n, first))
.collect();
flagged.sort_by_key(|(_, first)| (first.line, first.start));
for (count, first) in flagged {
warnings.push(self.warn_at(
ctx,
&first,
format!(
"Paragraph contains {count} emphasis spans (limit {limit}); consider reducing emphasis to improve readability"
),
));
}
}
if let Some(limit) = self.config.max_consecutive {
let mut run_start = 0usize; for i in 0..spans.len() {
let breaks = if i == 0 {
true
} else {
let prev = &spans[i - 1];
let cur = &spans[i];
let same_para = para_ids.get(prev.line - 1).copied().flatten()
== para_ids.get(cur.line - 1).copied().flatten()
&& para_ids.get(cur.line - 1).copied().flatten().is_some();
let between = ctx.content.get(prev.end..cur.start).unwrap_or("");
let only_filler = !between.chars().any(char::is_alphanumeric);
!(same_para && only_filler)
};
if breaks && i > run_start {
self.emit_run(ctx, &spans[run_start..i], limit, &mut warnings);
}
if breaks {
run_start = i;
}
}
if !spans.is_empty() {
self.emit_run(ctx, &spans[run_start..], limit, &mut warnings);
}
}
Ok(warnings)
}
fn fix_capability(&self) -> FixCapability {
FixCapability::Unfixable
}
fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
Ok(ctx.content.to_string())
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn default_config_section(&self) -> Option<(String, toml::Value)> {
let table = crate::rule_config_serde::config_schema_table(&MD081Config::default())?;
if table.is_empty() {
None
} else {
Some((MD081Config::RULE_NAME.to_string(), toml::Value::Table(table)))
}
}
fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
where
Self: Sized,
{
let rule_config = crate::rule_config_serde::load_rule_config::<MD081Config>(config);
Box::new(Self::from_config_struct(rule_config))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::MarkdownFlavor;
use crate::rule::LintWarning;
fn check(content: &str, config: MD081Config) -> Vec<LintWarning> {
let rule = MD081NoExcessiveEmphasis::from_config_struct(config);
let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
rule.check(&ctx).unwrap()
}
#[test]
fn flags_paragraph_over_max_per_paragraph() {
let config = MD081Config {
max_per_paragraph: Some(3),
..Default::default()
};
let content = "The **a** is **b** and **c** plus **d**.";
let warnings = check(content, config);
assert_eq!(warnings.len(), 1, "4 bold spans should exceed max-per-paragraph=3");
assert_eq!(warnings[0].line, 1);
}
#[test]
fn flags_consecutive_run_separated_only_by_punctuation() {
let config = MD081Config {
max_consecutive: Some(2),
..Default::default()
};
let content = "Tags: **one**, **two**, **three**.";
let warnings = check(content, config);
assert_eq!(
warnings.len(),
1,
"run of 3 adjacent bolds should exceed max-consecutive=2"
);
assert_eq!(warnings[0].line, 1);
}
#[test]
fn unicode_punctuation_does_not_break_consecutive_run() {
let config = MD081Config {
max_consecutive: Some(2),
..Default::default()
};
let content = "Tags: **one** \u{2014} **two** \u{2014} **three**.";
let warnings = check(content, config);
assert_eq!(
warnings.len(),
1,
"em-dash-separated bolds form one run of 3, exceeding max-consecutive=2. Got: {warnings:?}"
);
}
#[test]
fn connector_word_breaks_consecutive_run() {
let config = MD081Config {
max_consecutive: Some(2),
..Default::default()
};
let content = "Tags: **one**, **two**, and **three**.";
let warnings = check(content, config);
assert!(
warnings.is_empty(),
"a connector word should break the run below the limit. Got: {warnings:?}"
);
}
#[test]
fn disabled_by_default() {
let content = "**a** **b** **c** **d** **e** **f** **g** **h**.";
let warnings = check(content, MD081Config::default());
assert!(warnings.is_empty(), "rule must be off by default. Got: {warnings:?}");
}
#[test]
fn does_not_flag_setext_heading_text() {
let config = MD081Config {
max_per_paragraph: Some(2),
max_consecutive: Some(1),
..Default::default()
};
let content = "**A** **B** **C**\n=================\n";
let warnings = check(content, config);
assert!(
warnings.is_empty(),
"emphasis in setext heading text must not be flagged. Got: {warnings:?}"
);
}
#[test]
fn flags_list_item_before_thematic_break() {
let config = MD081Config {
max_per_paragraph: Some(1),
..Default::default()
};
let content = "- **a** and **b**\n---\n";
let warnings = check(content, config);
assert_eq!(
warnings.len(),
1,
"list item with 2 bolds before a thematic break should be flagged. Got: {warnings:?}"
);
}
#[test]
fn parses_kebab_case_keys_and_lowercase_targets_from_config() {
let mut config = crate::config::Config::default();
let mut rule_config = crate::config::RuleConfig::default();
rule_config
.values
.insert("max-per-paragraph".to_string(), toml::Value::Integer(1));
rule_config
.values
.insert("targets".to_string(), toml::Value::String("all".to_string()));
config.rules.insert("MD081".to_string(), rule_config);
let rule = MD081NoExcessiveEmphasis::from_config(&config);
let ctx = LintContext::new("This is **bold** and *italic*.", MarkdownFlavor::Standard, None);
let warnings = rule.check(&ctx).unwrap();
assert_eq!(
warnings.len(),
1,
"kebab-case max-per-paragraph and targets=\"all\" must parse from config. Got: {warnings:?}"
);
}
#[test]
fn does_not_flag_setext_heading_inside_blockquote() {
let config = MD081Config {
max_per_paragraph: Some(1),
..Default::default()
};
let content = "> **A** **B**\n> ===\n";
let warnings = check(content, config);
assert!(
warnings.is_empty(),
"emphasis in a blockquoted setext heading must not be flagged. Got: {warnings:?}"
);
}
#[test]
fn flags_blockquote_paragraph_before_top_level_break() {
let config = MD081Config {
max_per_paragraph: Some(1),
..Default::default()
};
let content = "> **a** and **b**\n---\n";
let warnings = check(content, config);
assert_eq!(
warnings.len(),
1,
"blockquote paragraph with 2 bolds before a top-level break should be flagged. Got: {warnings:?}"
);
}
#[test]
fn does_not_flag_emphasis_in_table_rows() {
let config = MD081Config {
max_per_paragraph: Some(1),
..Default::default()
};
let content = "| Col A | Col B |\n| ----- | ----- |\n| **a** | **b** |\n";
let warnings = check(content, config);
assert!(
warnings.is_empty(),
"emphasis in table cells must not be flagged. Got: {warnings:?}"
);
}
#[test]
fn does_not_flag_at_or_below_limit() {
let config = MD081Config {
max_per_paragraph: Some(3),
..Default::default()
};
let content = "The **a** is **b** and **c**.";
assert!(check(content, config).is_empty(), "3 spans must not exceed limit 3");
}
#[test]
fn excludes_code_blocks_and_inline_code() {
let config = MD081Config {
max_per_paragraph: Some(1),
..Default::default()
};
let content = "```python\nfoo(**a**, **b**, **c**, **d**)\n```\n\nText with `**x** **y** **z**` only.";
let warnings = check(content, config);
assert!(
warnings.is_empty(),
"emphasis inside code must be ignored. Got: {warnings:?}"
);
}
#[test]
fn counts_paragraphs_independently() {
let config = MD081Config {
max_per_paragraph: Some(2),
..Default::default()
};
let content = "First **a** and **b** here.\n\nSecond **c** and **d** here.";
assert!(
check(content, config).is_empty(),
"spans must not aggregate across the blank-line paragraph boundary"
);
}
#[test]
fn counts_list_items_independently() {
let config = MD081Config {
max_per_paragraph: Some(2),
..Default::default()
};
let content = "- item **a** and **b**\n- item **c** and **d**";
assert!(
check(content, config).is_empty(),
"each list item is its own paragraph and must be counted independently"
);
}
#[test]
fn targets_strong_ignores_italic() {
let config = MD081Config {
targets: EmphasisTarget::Strong,
max_per_paragraph: Some(1),
..Default::default()
};
let content = "Here is *a* and *b* and *c* and *d* with one **bold**.";
assert!(
check(content, config).is_empty(),
"targets=strong must ignore italic spans"
);
}
#[test]
fn targets_emphasis_counts_italic_only() {
let config = MD081Config {
targets: EmphasisTarget::Emphasis,
max_per_paragraph: Some(2),
..Default::default()
};
let content = "Lots of *a* and *b* and *c* italics, plus **bold**.";
let warnings = check(content, config);
assert_eq!(warnings.len(), 1, "3 italics exceed limit 2 under targets=emphasis");
}
#[test]
fn targets_all_dedups_combined_bold_italic() {
let config = MD081Config {
targets: EmphasisTarget::All,
max_per_paragraph: Some(1),
..Default::default()
};
let content = "Just ***one region*** here.";
assert!(
check(content, config).is_empty(),
"combined ***...*** must count once under targets=all"
);
}
#[test]
fn targets_all_counts_distinct_regions() {
let config = MD081Config {
targets: EmphasisTarget::All,
max_per_paragraph: Some(1),
..Default::default()
};
let content = "Mix ***a*** and **b** here.";
let warnings = check(content, config);
assert_eq!(warnings.len(), 1, "two distinct emphasis regions exceed limit 1");
}
#[test]
fn max_per_paragraph_zero_forbids_all_emphasis() {
let config = MD081Config {
max_per_paragraph: Some(0),
..Default::default()
};
let content = "A paragraph with one **bold** word.";
let warnings = check(content, config);
assert_eq!(
warnings.len(),
1,
"max-per-paragraph=0 must flag even a single emphasis span. Got: {warnings:?}"
);
}
#[test]
fn max_consecutive_zero_forbids_all_emphasis() {
let config = MD081Config {
max_consecutive: Some(0),
..Default::default()
};
let content = "A paragraph with one **bold** word.";
let warnings = check(content, config);
assert_eq!(
warnings.len(),
1,
"max-consecutive=0 must flag even a single emphasis span. Got: {warnings:?}"
);
}
#[test]
fn explicit_zero_in_toml_parses_as_forbid_all() {
let mut config = crate::config::Config::default();
let mut rule_config = crate::config::RuleConfig::default();
rule_config
.values
.insert("max-per-paragraph".to_string(), toml::Value::Integer(0));
config.rules.insert("MD081".to_string(), rule_config);
let rule = MD081NoExcessiveEmphasis::from_config(&config);
let ctx = LintContext::new("One **bold** here.", MarkdownFlavor::Standard, None);
let warnings = rule.check(&ctx).unwrap();
assert_eq!(
warnings.len(),
1,
"explicit max-per-paragraph = 0 must forbid all emphasis. Got: {warnings:?}"
);
}
}