1use crate::lint_context::LintContext;
20use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
21use crate::rule_config_serde::RuleConfig;
22use crate::utils::range_utils::calculate_match_range;
23use crate::utils::skip_context::{compute_html_code_ranges, should_skip_emphasis_span};
24use serde::{Deserialize, Serialize};
25
26#[derive(Debug, Clone, Copy)]
28struct CountedSpan {
29 start: usize,
30 end: usize,
31 line: usize,
32}
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
36#[serde(rename_all = "lowercase")]
37pub enum EmphasisTarget {
38 #[default]
40 Strong,
41 Emphasis,
43 All,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
49#[serde(rename_all = "kebab-case")]
50pub struct MD081Config {
51 #[serde(default)]
54 pub targets: EmphasisTarget,
55
56 #[serde(default)]
59 pub max_per_paragraph: usize,
60
61 #[serde(default)]
65 pub max_consecutive: usize,
66}
67
68impl Default for MD081Config {
69 fn default() -> Self {
70 Self {
71 targets: EmphasisTarget::Strong,
72 max_per_paragraph: 0,
73 max_consecutive: 0,
74 }
75 }
76}
77
78impl RuleConfig for MD081Config {
79 const RULE_NAME: &'static str = "MD081";
80}
81
82#[derive(Debug, Clone, Default)]
83pub struct MD081NoExcessiveEmphasis {
84 config: MD081Config,
85}
86
87impl MD081NoExcessiveEmphasis {
88 pub fn new() -> Self {
89 Self::default()
90 }
91
92 pub fn from_config_struct(config: MD081Config) -> Self {
93 Self { config }
94 }
95
96 fn counted_spans(&self, ctx: &LintContext) -> Vec<CountedSpan> {
101 let html_tags = ctx.html_tags();
102 let html_code_ranges = compute_html_code_ranges(&html_tags);
103
104 let mut spans: Vec<CountedSpan> = ctx
105 .emphasis_spans()
106 .iter()
107 .filter(|s| match self.config.targets {
108 EmphasisTarget::Strong => s.is_strong,
109 EmphasisTarget::Emphasis => !s.is_strong,
110 EmphasisTarget::All => true,
111 })
112 .filter(|s| !should_skip_emphasis_span(ctx, &html_tags, &html_code_ranges, s.byte_offset))
113 .map(|s| CountedSpan {
114 start: s.byte_offset,
115 end: s.byte_end,
116 line: s.line,
117 })
118 .collect();
119
120 spans.sort_by_key(|s| (s.start, std::cmp::Reverse(s.end)));
121
122 if self.config.targets == EmphasisTarget::All {
123 let mut deduped: Vec<CountedSpan> = Vec::with_capacity(spans.len());
127 let mut max_end = 0usize;
128 for span in spans {
129 if span.end <= max_end {
130 continue;
131 }
132 max_end = span.end;
133 deduped.push(span);
134 }
135 deduped
136 } else {
137 spans
138 }
139 }
140
141 fn setext_text_lines(ctx: &LintContext) -> Vec<bool> {
147 let mut flags = vec![false; ctx.lines.len()];
148 for (idx, line) in ctx.lines.iter().enumerate() {
149 if idx == 0 || line.in_code_block {
150 continue;
151 }
152 let text = Self::line_inner(line, ctx.content);
153 let is_underline = !text.is_empty() && (text.bytes().all(|b| b == b'=') || text.bytes().all(|b| b == b'-'));
154 if !is_underline {
155 continue;
156 }
157 let level = Self::blockquote_level(line);
158 let mut j = idx;
164 while j > 0 {
165 let prev = &ctx.lines[j - 1];
166 if prev.is_blank
167 || !prev.is_paragraph_context()
168 || prev.list_item.is_some()
169 || Self::blockquote_level(prev) != level
170 {
171 break;
172 }
173 flags[j - 1] = true;
174 j -= 1;
175 }
176 }
177 flags
178 }
179
180 fn line_inner<'a>(line: &'a crate::lint_context::LineInfo, source: &'a str) -> &'a str {
182 match line.blockquote.as_ref() {
183 Some(bq) => bq.content.trim(),
184 None => line.content(source).trim(),
185 }
186 }
187
188 fn blockquote_level(line: &crate::lint_context::LineInfo) -> usize {
190 line.blockquote.as_ref().map_or(0, |b| b.nesting_level)
191 }
192
193 fn paragraph_ids(ctx: &LintContext) -> Vec<Option<usize>> {
199 let mut ids = vec![None; ctx.lines.len()];
200 let setext_text = Self::setext_text_lines(ctx);
201 let mut current: Option<usize> = None;
202 let mut next_id = 0usize;
203 let mut prev_bq_level = 0usize;
204
205 for (idx, line) in ctx.lines.iter().enumerate() {
206 let bq_level = Self::blockquote_level(line);
207 let is_prose =
208 !line.is_blank && line.is_paragraph_context() && !setext_text[idx] && !ctx.is_in_table_block(idx + 1);
209
210 if !is_prose {
211 current = None;
212 prev_bq_level = bq_level;
213 continue;
214 }
215
216 let starts_new = current.is_none() || line.list_item.is_some() || bq_level != prev_bq_level;
217 if starts_new {
218 current = Some(next_id);
219 next_id += 1;
220 }
221 ids[idx] = current;
222 prev_bq_level = bq_level;
223 }
224
225 ids
226 }
227
228 fn emit_run(&self, ctx: &LintContext, run: &[CountedSpan], warnings: &mut Vec<LintWarning>) {
231 if run.len() > self.config.max_consecutive
232 && let Some(first) = run.first()
233 {
234 warnings.push(self.warn_at(
235 ctx,
236 first,
237 format!(
238 "{} consecutive emphasis spans (limit {}); consider rephrasing to reduce emphasis",
239 run.len(),
240 self.config.max_consecutive
241 ),
242 ));
243 }
244 }
245
246 fn warn_at(&self, ctx: &LintContext, span: &CountedSpan, message: String) -> LintWarning {
247 let line_content = ctx.lines.get(span.line - 1).map_or("", |l| l.content(ctx.content));
248 let line_start = ctx.lines.get(span.line - 1).map_or(0, |l| l.byte_offset);
249 let match_start_in_line = span.start.saturating_sub(line_start);
250 let (start_line, start_col, end_line, end_col) =
251 calculate_match_range(span.line, line_content, match_start_in_line, span.end - span.start);
252 LintWarning {
253 rule_name: Some(self.name().to_string()),
254 severity: Severity::Warning,
255 line: start_line,
256 column: start_col,
257 end_line,
258 end_column: end_col,
259 message,
260 fix: None,
261 }
262 }
263}
264
265impl Rule for MD081NoExcessiveEmphasis {
266 fn name(&self) -> &'static str {
267 "MD081"
268 }
269
270 fn description(&self) -> &'static str {
271 "Inline emphasis should not be excessive"
272 }
273
274 fn category(&self) -> RuleCategory {
275 RuleCategory::Emphasis
276 }
277
278 fn check(&self, ctx: &LintContext) -> LintResult {
279 if self.config.max_per_paragraph == 0 && self.config.max_consecutive == 0 {
280 return Ok(Vec::new());
281 }
282
283 let spans = self.counted_spans(ctx);
284 if spans.is_empty() {
285 return Ok(Vec::new());
286 }
287
288 let para_ids = Self::paragraph_ids(ctx);
289 let mut warnings = Vec::new();
290
291 if self.config.max_per_paragraph > 0 {
292 let mut counts: std::collections::HashMap<usize, (usize, CountedSpan)> = std::collections::HashMap::new();
296 for span in &spans {
297 let Some(pid) = para_ids.get(span.line - 1).copied().flatten() else {
298 continue;
299 };
300 counts.entry(pid).and_modify(|(n, _)| *n += 1).or_insert((1, *span));
301 }
302 let mut flagged: Vec<(usize, CountedSpan)> = counts
303 .into_iter()
304 .filter(|(_, (n, _))| *n > self.config.max_per_paragraph)
305 .map(|(_, (n, first))| (n, first))
306 .collect();
307 flagged.sort_by_key(|(_, first)| (first.line, first.start));
308 for (count, first) in flagged {
309 warnings.push(self.warn_at(
310 ctx,
311 &first,
312 format!(
313 "Paragraph contains {count} emphasis spans (limit {}); consider reducing emphasis to improve readability",
314 self.config.max_per_paragraph
315 ),
316 ));
317 }
318 }
319
320 if self.config.max_consecutive > 0 {
321 let mut run_start = 0usize; for i in 0..spans.len() {
326 let breaks = if i == 0 {
327 true
328 } else {
329 let prev = &spans[i - 1];
330 let cur = &spans[i];
331 let same_para = para_ids.get(prev.line - 1).copied().flatten()
332 == para_ids.get(cur.line - 1).copied().flatten()
333 && para_ids.get(cur.line - 1).copied().flatten().is_some();
334 let between = ctx.content.get(prev.end..cur.start).unwrap_or("");
335 let only_filler = !between.chars().any(char::is_alphanumeric);
339 !(same_para && only_filler)
340 };
341
342 if breaks && i > run_start {
343 self.emit_run(ctx, &spans[run_start..i], &mut warnings);
344 }
345 if breaks {
346 run_start = i;
347 }
348 }
349 if !spans.is_empty() {
350 self.emit_run(ctx, &spans[run_start..], &mut warnings);
351 }
352 }
353
354 Ok(warnings)
355 }
356
357 fn fix_capability(&self) -> FixCapability {
358 FixCapability::Unfixable
359 }
360
361 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
362 Ok(ctx.content.to_string())
365 }
366
367 fn as_any(&self) -> &dyn std::any::Any {
368 self
369 }
370
371 fn default_config_section(&self) -> Option<(String, toml::Value)> {
372 let table = crate::rule_config_serde::config_schema_table(&MD081Config::default())?;
373 if table.is_empty() {
374 None
375 } else {
376 Some((MD081Config::RULE_NAME.to_string(), toml::Value::Table(table)))
377 }
378 }
379
380 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
381 where
382 Self: Sized,
383 {
384 let rule_config = crate::rule_config_serde::load_rule_config::<MD081Config>(config);
385 Box::new(Self::from_config_struct(rule_config))
386 }
387}
388
389#[cfg(test)]
390mod tests {
391 use super::*;
392 use crate::config::MarkdownFlavor;
393 use crate::rule::LintWarning;
394
395 fn check(content: &str, config: MD081Config) -> Vec<LintWarning> {
396 let rule = MD081NoExcessiveEmphasis::from_config_struct(config);
397 let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
398 rule.check(&ctx).unwrap()
399 }
400
401 #[test]
402 fn flags_paragraph_over_max_per_paragraph() {
403 let config = MD081Config {
404 max_per_paragraph: 3,
405 ..Default::default()
406 };
407 let content = "The **a** is **b** and **c** plus **d**.";
408 let warnings = check(content, config);
409 assert_eq!(warnings.len(), 1, "4 bold spans should exceed max-per-paragraph=3");
410 assert_eq!(warnings[0].line, 1);
411 }
412
413 #[test]
414 fn flags_consecutive_run_separated_only_by_punctuation() {
415 let config = MD081Config {
416 max_consecutive: 2,
417 ..Default::default()
418 };
419 let content = "Tags: **one**, **two**, **three**.";
421 let warnings = check(content, config);
422 assert_eq!(
423 warnings.len(),
424 1,
425 "run of 3 adjacent bolds should exceed max-consecutive=2"
426 );
427 assert_eq!(warnings[0].line, 1);
428 }
429
430 #[test]
431 fn unicode_punctuation_does_not_break_consecutive_run() {
432 let config = MD081Config {
435 max_consecutive: 2,
436 ..Default::default()
437 };
438 let content = "Tags: **one** \u{2014} **two** \u{2014} **three**.";
439 let warnings = check(content, config);
440 assert_eq!(
441 warnings.len(),
442 1,
443 "em-dash-separated bolds form one run of 3, exceeding max-consecutive=2. Got: {warnings:?}"
444 );
445 }
446
447 #[test]
448 fn connector_word_breaks_consecutive_run() {
449 let config = MD081Config {
450 max_consecutive: 2,
451 ..Default::default()
452 };
453 let content = "Tags: **one**, **two**, and **three**.";
455 let warnings = check(content, config);
456 assert!(
457 warnings.is_empty(),
458 "a connector word should break the run below the limit. Got: {warnings:?}"
459 );
460 }
461
462 #[test]
463 fn disabled_by_default() {
464 let content = "**a** **b** **c** **d** **e** **f** **g** **h**.";
467 let warnings = check(content, MD081Config::default());
468 assert!(warnings.is_empty(), "rule must be off by default. Got: {warnings:?}");
469 }
470
471 #[test]
472 fn does_not_flag_setext_heading_text() {
473 let config = MD081Config {
476 max_per_paragraph: 2,
477 max_consecutive: 1,
478 ..Default::default()
479 };
480 let content = "**A** **B** **C**\n=================\n";
481 let warnings = check(content, config);
482 assert!(
483 warnings.is_empty(),
484 "emphasis in setext heading text must not be flagged. Got: {warnings:?}"
485 );
486 }
487
488 #[test]
489 fn flags_list_item_before_thematic_break() {
490 let config = MD081Config {
494 max_per_paragraph: 1,
495 ..Default::default()
496 };
497 let content = "- **a** and **b**\n---\n";
498 let warnings = check(content, config);
499 assert_eq!(
500 warnings.len(),
501 1,
502 "list item with 2 bolds before a thematic break should be flagged. Got: {warnings:?}"
503 );
504 }
505
506 #[test]
507 fn parses_kebab_case_keys_and_lowercase_targets_from_config() {
508 let mut config = crate::config::Config::default();
512 let mut rule_config = crate::config::RuleConfig::default();
513 rule_config
514 .values
515 .insert("max-per-paragraph".to_string(), toml::Value::Integer(1));
516 rule_config
517 .values
518 .insert("targets".to_string(), toml::Value::String("all".to_string()));
519 config.rules.insert("MD081".to_string(), rule_config);
520
521 let rule = MD081NoExcessiveEmphasis::from_config(&config);
522 let ctx = LintContext::new("This is **bold** and *italic*.", MarkdownFlavor::Standard, None);
527 let warnings = rule.check(&ctx).unwrap();
528 assert_eq!(
529 warnings.len(),
530 1,
531 "kebab-case max-per-paragraph and targets=\"all\" must parse from config. Got: {warnings:?}"
532 );
533 }
534
535 #[test]
536 fn does_not_flag_setext_heading_inside_blockquote() {
537 let config = MD081Config {
540 max_per_paragraph: 1,
541 ..Default::default()
542 };
543 let content = "> **A** **B**\n> ===\n";
544 let warnings = check(content, config);
545 assert!(
546 warnings.is_empty(),
547 "emphasis in a blockquoted setext heading must not be flagged. Got: {warnings:?}"
548 );
549 }
550
551 #[test]
552 fn flags_blockquote_paragraph_before_top_level_break() {
553 let config = MD081Config {
556 max_per_paragraph: 1,
557 ..Default::default()
558 };
559 let content = "> **a** and **b**\n---\n";
560 let warnings = check(content, config);
561 assert_eq!(
562 warnings.len(),
563 1,
564 "blockquote paragraph with 2 bolds before a top-level break should be flagged. Got: {warnings:?}"
565 );
566 }
567
568 #[test]
569 fn does_not_flag_emphasis_in_table_rows() {
570 let config = MD081Config {
572 max_per_paragraph: 1,
573 ..Default::default()
574 };
575 let content = "| Col A | Col B |\n| ----- | ----- |\n| **a** | **b** |\n";
576 let warnings = check(content, config);
577 assert!(
578 warnings.is_empty(),
579 "emphasis in table cells must not be flagged. Got: {warnings:?}"
580 );
581 }
582
583 #[test]
584 fn does_not_flag_at_or_below_limit() {
585 let config = MD081Config {
586 max_per_paragraph: 3,
587 ..Default::default()
588 };
589 let content = "The **a** is **b** and **c**.";
590 assert!(check(content, config).is_empty(), "3 spans must not exceed limit 3");
591 }
592
593 #[test]
594 fn excludes_code_blocks_and_inline_code() {
595 let config = MD081Config {
596 max_per_paragraph: 1,
597 ..Default::default()
598 };
599 let content = "```python\nfoo(**a**, **b**, **c**, **d**)\n```\n\nText with `**x** **y** **z**` only.";
601 let warnings = check(content, config);
602 assert!(
603 warnings.is_empty(),
604 "emphasis inside code must be ignored. Got: {warnings:?}"
605 );
606 }
607
608 #[test]
609 fn counts_paragraphs_independently() {
610 let config = MD081Config {
611 max_per_paragraph: 2,
612 ..Default::default()
613 };
614 let content = "First **a** and **b** here.\n\nSecond **c** and **d** here.";
616 assert!(
617 check(content, config).is_empty(),
618 "spans must not aggregate across the blank-line paragraph boundary"
619 );
620 }
621
622 #[test]
623 fn counts_list_items_independently() {
624 let config = MD081Config {
625 max_per_paragraph: 2,
626 ..Default::default()
627 };
628 let content = "- item **a** and **b**\n- item **c** and **d**";
630 assert!(
631 check(content, config).is_empty(),
632 "each list item is its own paragraph and must be counted independently"
633 );
634 }
635
636 #[test]
637 fn targets_strong_ignores_italic() {
638 let config = MD081Config {
639 targets: EmphasisTarget::Strong,
640 max_per_paragraph: 1,
641 ..Default::default()
642 };
643 let content = "Here is *a* and *b* and *c* and *d* with one **bold**.";
645 assert!(
646 check(content, config).is_empty(),
647 "targets=strong must ignore italic spans"
648 );
649 }
650
651 #[test]
652 fn targets_emphasis_counts_italic_only() {
653 let config = MD081Config {
654 targets: EmphasisTarget::Emphasis,
655 max_per_paragraph: 2,
656 ..Default::default()
657 };
658 let content = "Lots of *a* and *b* and *c* italics, plus **bold**.";
659 let warnings = check(content, config);
660 assert_eq!(warnings.len(), 1, "3 italics exceed limit 2 under targets=emphasis");
661 }
662
663 #[test]
664 fn targets_all_dedups_combined_bold_italic() {
665 let config = MD081Config {
666 targets: EmphasisTarget::All,
667 max_per_paragraph: 1,
668 ..Default::default()
669 };
670 let content = "Just ***one region*** here.";
673 assert!(
674 check(content, config).is_empty(),
675 "combined ***...*** must count once under targets=all"
676 );
677 }
678
679 #[test]
680 fn targets_all_counts_distinct_regions() {
681 let config = MD081Config {
682 targets: EmphasisTarget::All,
683 max_per_paragraph: 1,
684 ..Default::default()
685 };
686 let content = "Mix ***a*** and **b** here.";
687 let warnings = check(content, config);
688 assert_eq!(warnings.len(), 1, "two distinct emphasis regions exceed limit 1");
689 }
690}