rumdl_lib/rules/md013_line_length.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod block_builder;
22mod helpers;
23pub mod md013_config;
24use crate::utils::is_template_directive_only;
25use block_builder::{Block, BlockBuilder};
26use helpers::{
27 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_html_only_line,
28 is_list_item, is_standalone_link_or_image_line, is_unwrappable_line, split_into_segments,
29 trim_preserving_hard_break,
30};
31pub use md013_config::MD013Config;
32use md013_config::{LengthMode, ReflowMode};
33
34#[cfg(test)]
35mod tests;
36use unicode_width::UnicodeWidthStr;
37
38#[derive(Clone, Default)]
39pub struct MD013LineLength {
40 pub(crate) config: MD013Config,
41}
42
43/// Blockquote paragraph line collected for reflow, with original line index for range computation.
44struct CollectedBlockquoteLine {
45 line_idx: usize,
46 data: BlockquoteLineData,
47}
48
49impl MD013LineLength {
50 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
51 Self {
52 config: MD013Config {
53 line_length: crate::types::LineLength::new(line_length),
54 code_blocks,
55 tables,
56 headings,
57 paragraphs: true, // Default to true for backwards compatibility
58 blockquotes: true, // Default to true for backwards compatibility
59 strict,
60 stern: false,
61 heading_line_length: None,
62 code_block_line_length: None,
63 reflow: false,
64 reflow_mode: ReflowMode::default(),
65 length_mode: LengthMode::default(),
66 abbreviations: Vec::new(),
67 require_sentence_capital: true,
68 },
69 }
70 }
71
72 pub fn from_config_struct(config: MD013Config) -> Self {
73 Self { config }
74 }
75
76 /// Return a clone with code block checking disabled.
77 /// Used for doc comment linting where code blocks are Rust code managed by rustfmt.
78 pub fn with_code_blocks_disabled(&self) -> Self {
79 let mut clone = self.clone();
80 clone.config.code_blocks = false;
81 clone
82 }
83
84 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
85 fn reflow_length_mode(&self) -> ReflowLengthMode {
86 match self.config.length_mode {
87 LengthMode::Chars => ReflowLengthMode::Chars,
88 LengthMode::Visual => ReflowLengthMode::Visual,
89 LengthMode::Bytes => ReflowLengthMode::Bytes,
90 }
91 }
92
93 fn should_ignore_line(
94 &self,
95 line: &str,
96 _lines: &[&str],
97 current_line: usize,
98 ctx: &crate::lint_context::LintContext,
99 ) -> bool {
100 if self.config.strict {
101 return false;
102 }
103
104 // Quick check for common patterns before expensive regex
105 let trimmed = line.trim();
106
107 // Only skip if the entire line is a URL (quick check first)
108 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
109 return true;
110 }
111
112 // Only skip if the entire line is an image reference (quick check first)
113 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
114 return true;
115 }
116
117 // Note: link reference definitions are handled as always-exempt (even in strict mode)
118 // in the main check loop, so they don't need to be checked here.
119
120 // Code blocks with long strings (only check if in code block)
121 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
122 && !trimmed.is_empty()
123 && !line.contains(' ')
124 && !line.contains('\t')
125 {
126 return true;
127 }
128
129 false
130 }
131
132 /// Check if rule should skip based on provided config (used for inline config support)
133 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
134 // Skip if content is empty
135 if ctx.content.is_empty() {
136 return true;
137 }
138
139 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
140 if config.reflow
141 && (config.reflow_mode == ReflowMode::SentencePerLine
142 || config.reflow_mode == ReflowMode::SemanticLineBreaks
143 || config.reflow_mode == ReflowMode::Normalize)
144 {
145 return false;
146 }
147
148 // Use the smallest applicable budget across line/heading/code-block
149 // contexts so a stricter context-specific limit doesn't get masked by
150 // the document-wide budget.
151 let min_limit = config.min_effective_line_length();
152 if min_limit.is_unlimited() {
153 return true;
154 }
155 let min_limit_bytes = min_limit.get();
156
157 // Quick check: if total content is shorter than the smallest line limit,
158 // definitely skip.
159 if ctx.content.len() <= min_limit_bytes {
160 return true;
161 }
162
163 // Skip if no line exceeds the smallest applicable limit.
164 !ctx.lines.iter().any(|line| line.byte_len > min_limit_bytes)
165 }
166
167 fn normalize_mode_needs_reflow<'a, I>(&self, lines: I, config: &MD013Config) -> bool
168 where
169 I: IntoIterator<Item = &'a str>,
170 {
171 let mut line_count = 0;
172 let check_length = !config.line_length.is_unlimited();
173
174 for line in lines {
175 line_count += 1;
176 if check_length && self.calculate_effective_length(line) > config.line_length.get() {
177 return true;
178 }
179 }
180
181 line_count > 1
182 }
183}
184
185impl Rule for MD013LineLength {
186 fn name(&self) -> &'static str {
187 "MD013"
188 }
189
190 fn description(&self) -> &'static str {
191 "Line length should not be excessive"
192 }
193
194 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
195 // Use pre-parsed inline config from LintContext
196 let config_override = ctx.inline_config().get_rule_config("MD013");
197
198 // Apply configuration override if present
199 let effective_config = if let Some(json_config) = config_override {
200 if let Some(obj) = json_config.as_object() {
201 let mut config = self.config.clone();
202 if let Some(line_length) = obj.get("line_length").and_then(serde_json::Value::as_u64) {
203 config.line_length = crate::types::LineLength::new(line_length as usize);
204 }
205 if let Some(code_blocks) = obj.get("code_blocks").and_then(serde_json::Value::as_bool) {
206 config.code_blocks = code_blocks;
207 }
208 if let Some(tables) = obj.get("tables").and_then(serde_json::Value::as_bool) {
209 config.tables = tables;
210 }
211 if let Some(headings) = obj.get("headings").and_then(serde_json::Value::as_bool) {
212 config.headings = headings;
213 }
214 if let Some(blockquotes) = obj.get("blockquotes").and_then(serde_json::Value::as_bool) {
215 config.blockquotes = blockquotes;
216 }
217 if let Some(strict) = obj.get("strict").and_then(serde_json::Value::as_bool) {
218 config.strict = strict;
219 }
220 if let Some(reflow) = obj.get("reflow").and_then(serde_json::Value::as_bool) {
221 config.reflow = reflow;
222 }
223 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
224 config.reflow_mode = match reflow_mode {
225 "default" => ReflowMode::Default,
226 "normalize" => ReflowMode::Normalize,
227 "sentence-per-line" => ReflowMode::SentencePerLine,
228 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
229 _ => ReflowMode::default(),
230 };
231 }
232 config
233 } else {
234 self.config.clone()
235 }
236 } else {
237 self.config.clone()
238 };
239
240 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
241 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
242 if self.should_skip_with_config(ctx, &effective_config)
243 && !(effective_config.reflow
244 && (effective_config.reflow_mode == ReflowMode::Normalize
245 || effective_config.reflow_mode == ReflowMode::SentencePerLine
246 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
247 {
248 return Ok(Vec::new());
249 }
250
251 // Direct implementation without DocumentStructure
252 let mut warnings = Vec::new();
253
254 // Special handling: line_length = 0 means "no line length limit"
255 // Skip all line length checks, but still allow reflow if enabled
256 let skip_length_checks = effective_config.line_length.is_unlimited();
257
258 // Pre-filter lines that could be problematic to avoid processing all lines.
259 // Use the smallest applicable budget across line/heading/code-block contexts
260 // so candidates aren't dropped when a stricter context-specific budget applies.
261 let prefilter_limit = effective_config.min_effective_line_length();
262 let prefilter_skip = prefilter_limit.is_unlimited();
263 let mut candidate_lines = Vec::new();
264 if !skip_length_checks && !prefilter_skip {
265 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
266 // Skip front matter - it should never be linted
267 if line_info.in_front_matter {
268 continue;
269 }
270
271 // Quick length check first
272 if line_info.byte_len > prefilter_limit.get() {
273 candidate_lines.push(line_idx);
274 }
275 }
276 }
277
278 // If no candidate lines and not in normalize or sentence-per-line mode, early return
279 if candidate_lines.is_empty()
280 && !(effective_config.reflow
281 && (effective_config.reflow_mode == ReflowMode::Normalize
282 || effective_config.reflow_mode == ReflowMode::SentencePerLine
283 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
284 {
285 return Ok(warnings);
286 }
287
288 let lines = ctx.raw_lines();
289
290 // Create a quick lookup set for heading lines
291 // We need this for both the heading skip check AND the paragraphs check
292 let heading_lines_set: std::collections::HashSet<usize> = ctx
293 .lines
294 .iter()
295 .enumerate()
296 .filter(|(_, line)| line.heading.is_some())
297 .map(|(idx, _)| idx + 1)
298 .collect();
299
300 // Use pre-computed table blocks from context
301 // We need this for both the table skip check AND the paragraphs check
302 let table_blocks = &ctx.table_blocks;
303 let mut table_lines_set = std::collections::HashSet::new();
304 for table in table_blocks {
305 table_lines_set.insert(table.header_line + 1);
306 table_lines_set.insert(table.delimiter_line + 1);
307 for &line in &table.content_lines {
308 table_lines_set.insert(line + 1);
309 }
310 }
311
312 // Process candidate lines for line length checks
313 'line_loop: for &line_idx in &candidate_lines {
314 let line_number = line_idx + 1;
315 let line = lines[line_idx];
316
317 // Calculate actual line length (used in warning messages)
318 let effective_length = self.calculate_effective_length(line);
319
320 // Pick the context-specific limit: heading > code-block > paragraph.
321 // Headings dominate over code-block context if a setext underline ever
322 // overlaps a fenced range (defensive — these are mutually exclusive in
323 // practice, but the explicit ordering documents intent).
324 let is_heading_line = heading_lines_set.contains(&line_number);
325 let in_code_block = ctx.line_info(line_number).is_some_and(|info| info.in_code_block);
326 let line_limit = if is_heading_line {
327 effective_config.effective_heading_line_length().get()
328 } else if in_code_block {
329 effective_config.effective_code_block_line_length().get()
330 } else {
331 effective_config.line_length.get()
332 };
333
334 // A context-specific limit of 0 means "unlimited for this context".
335 if line_limit == 0 {
336 continue;
337 }
338
339 // Stern mode: like default, but the trailing-token forgiveness is
340 // disabled — a line with whitespace that exceeds the limit is a
341 // violation even if the excess is the final token. The "unwrappable"
342 // line exemption (single token, optionally prefixed by # or >) is
343 // still honored. Strict overrides stern entirely.
344 if effective_config.stern && !effective_config.strict && is_unwrappable_line(line) {
345 continue;
346 }
347
348 // Trailing-token forgiveness: only in default mode (not strict, not stern).
349 // If the line only exceeds the limit because of a long token at the end
350 // (URL, link chain, identifier), it passes. This matches markdownlint's
351 // behavior: line.replace(/\S*$/u, "#")
352 let check_length = if effective_config.strict || effective_config.stern {
353 effective_length
354 } else {
355 match line.rfind(char::is_whitespace) {
356 Some(pos) => {
357 let ws_char = line[pos..].chars().next().unwrap();
358 let prefix_end = pos + ws_char.len_utf8();
359 self.calculate_string_length(&line[..prefix_end]) + 1
360 }
361 None => 1, // No whitespace — entire line is a single token
362 }
363 };
364
365 // Skip lines where the check length is within the limit
366 if check_length <= line_limit {
367 continue;
368 }
369
370 // Semantic link understanding: suppress when excess comes entirely from inline URLs
371 if !effective_config.strict {
372 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
373 if text_only_length <= line_limit {
374 continue;
375 }
376 }
377
378 // Skip mkdocstrings and pymdown blocks (already handled by LintContext)
379 if ctx.lines[line_idx].in_mkdocstrings || ctx.lines[line_idx].in_pymdown_block {
380 continue;
381 }
382
383 // Skip MyST comments (% comment) — structural lines, not prose
384 if ctx.lines[line_idx].is_myst_comment {
385 continue;
386 }
387
388 // Link reference definitions are always exempt, even in strict mode.
389 // There's no way to shorten them without breaking the URL.
390 // Also check after stripping list markers, since list items may
391 // contain link ref defs as their content.
392 {
393 let trimmed = line.trim();
394 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
395 continue;
396 }
397 if is_list_item(trimmed) {
398 let (_, content) = extract_list_marker_and_content(trimmed);
399 let content_trimmed = content.trim();
400 if content_trimmed.starts_with('[')
401 && content_trimmed.contains("]:")
402 && LINK_REF_PATTERN.is_match(content_trimmed)
403 {
404 continue;
405 }
406 }
407 }
408
409 // Skip various block types efficiently
410 if !effective_config.strict {
411 // Lines whose only content is a link/image are exempt.
412 // After stripping list markers, blockquote markers, and emphasis,
413 // if only a link or image remains, there is no way to shorten it.
414 if is_standalone_link_or_image_line(line) {
415 continue;
416 }
417
418 // Lines consisting entirely of HTML tags are exempt.
419 // Badge lines, images with attributes, and similar inline HTML
420 // are long due to URLs in attributes and can't be meaningfully shortened.
421 if is_html_only_line(line) {
422 continue;
423 }
424
425 // Skip setext heading underlines
426 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
427 continue;
428 }
429
430 // Skip block elements according to config flags
431 // The flags mean: true = check these elements, false = skip these elements
432 // So we skip when the flag is FALSE and the line is in that element type
433 if (!effective_config.headings && heading_lines_set.contains(&line_number))
434 || (!effective_config.code_blocks
435 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
436 || (!effective_config.tables && table_lines_set.contains(&line_number))
437 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
438 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
439 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
440 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
441 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_block)
442 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
443 || ctx.line_info(line_number).is_some_and(|info| info.in_pymdown_block)
444 {
445 continue;
446 }
447
448 // Check if this is a paragraph/regular text line
449 // If paragraphs = false, skip lines that are NOT in special blocks
450 // Blockquote content is treated as paragraph text, so it's not
451 // included in the special blocks list here.
452 if !effective_config.paragraphs {
453 let is_special_block = heading_lines_set.contains(&line_number)
454 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
455 || table_lines_set.contains(&line_number)
456 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
457 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
458 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
459 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
460 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_block)
461 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
462 || ctx
463 .line_info(line_number)
464 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container);
465
466 // Skip regular paragraph text when paragraphs = false
467 if !is_special_block {
468 continue;
469 }
470 }
471
472 // Skip blockquote lines when blockquotes = false.
473 // Also skip lazy continuation lines that belong to a blockquote
474 // (lines without `>` prefix that follow a blockquote line).
475 if !effective_config.blockquotes {
476 if ctx.lines[line_number - 1].blockquote.is_some() {
477 continue;
478 }
479 // Check for lazy continuation: scan backwards through
480 // non-blank lines to find if this paragraph started with
481 // a blockquote marker
482 if !line.trim().is_empty() {
483 let mut scan = line_number.saturating_sub(2);
484 loop {
485 if ctx.lines[scan].blockquote.is_some() {
486 // Found a blockquote ancestor — this is a lazy continuation
487 continue 'line_loop;
488 }
489 if lines[scan].trim().is_empty() || scan == 0 {
490 break;
491 }
492 scan -= 1;
493 }
494 }
495 }
496
497 // Skip lines that are only a URL, image ref, or link ref
498 if self.should_ignore_line(line, lines, line_idx, ctx) {
499 continue;
500 }
501 }
502
503 // In sentence-per-line mode, check if this is a single long sentence
504 // If so, emit a warning without a fix (user must manually rephrase)
505 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
506 let sentences = split_into_sentences(line.trim());
507 if sentences.len() == 1 {
508 // Single sentence that's too long - warn but don't auto-fix
509 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
510
511 let (start_line, start_col, end_line, end_col) =
512 calculate_excess_range(line_number, line, line_limit);
513
514 warnings.push(LintWarning {
515 rule_name: Some(self.name().to_string()),
516 message,
517 line: start_line,
518 column: start_col,
519 end_line,
520 end_column: end_col,
521 severity: Severity::Warning,
522 fix: None, // No auto-fix for long single sentences
523 });
524 continue;
525 }
526 // Multiple sentences will be handled by paragraph-based reflow
527 continue;
528 }
529
530 // In semantic-line-breaks mode, skip per-line checks —
531 // all reflow is handled at the paragraph level with cascading splits
532 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
533 continue;
534 }
535
536 // Don't provide fix for individual lines when reflow is enabled
537 // Paragraph-based fixes will be handled separately
538 let fix = None;
539
540 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
541
542 // Calculate precise character range for the excess portion
543 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
544
545 warnings.push(LintWarning {
546 rule_name: Some(self.name().to_string()),
547 message,
548 line: start_line,
549 column: start_col,
550 end_line,
551 end_column: end_col,
552 severity: Severity::Warning,
553 fix,
554 });
555 }
556
557 // If reflow is enabled, generate paragraph-based fixes
558 if effective_config.reflow {
559 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
560 // Merge paragraph warnings with line warnings, removing duplicates
561 for pw in paragraph_warnings {
562 // Remove any line warnings that overlap with this paragraph
563 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
564 warnings.push(pw);
565 }
566 }
567
568 Ok(warnings)
569 }
570
571 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
572 // For CLI usage, apply fixes from warnings
573 // LSP will use the warning-based fixes directly
574 let warnings = self.check(ctx)?;
575 let warnings =
576 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
577
578 // If there are no fixes, return content unchanged
579 if !warnings.iter().any(|w| w.fix.is_some()) {
580 return Ok(ctx.content.to_string());
581 }
582
583 // Apply warning-based fixes
584 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
585 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
586 }
587
588 fn as_any(&self) -> &dyn std::any::Any {
589 self
590 }
591
592 fn category(&self) -> RuleCategory {
593 RuleCategory::Whitespace
594 }
595
596 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
597 self.should_skip_with_config(ctx, &self.config)
598 }
599
600 fn default_config_section(&self) -> Option<(String, toml::Value)> {
601 let table = crate::rule_config_serde::config_schema_table(&MD013Config::default())?;
602 if table.is_empty() {
603 None
604 } else {
605 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
606 }
607 }
608
609 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
610 let mut aliases = std::collections::HashMap::new();
611 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
612 aliases.insert("strict_sentences".to_string(), "require-sentence-capital".to_string());
613 aliases.insert("strict-sentences".to_string(), "require-sentence-capital".to_string());
614 Some(aliases)
615 }
616
617 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
618 where
619 Self: Sized,
620 {
621 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
622 // Use global line_length if rule-specific config still has default value
623 if rule_config.line_length.get() == 80 {
624 rule_config.line_length = config.global.line_length;
625 }
626 Box::new(Self::from_config_struct(rule_config))
627 }
628}
629
630impl MD013LineLength {
631 fn is_blockquote_content_boundary(
632 &self,
633 content: &str,
634 line_num: usize,
635 ctx: &crate::lint_context::LintContext,
636 ) -> bool {
637 let trimmed = content.trim();
638
639 trimmed.is_empty()
640 || ctx.line_info(line_num).is_some_and(|info| {
641 info.in_code_block
642 || info.in_front_matter
643 || info.in_html_block
644 || info.in_html_comment
645 || info.in_esm_block
646 || info.in_jsx_expression
647 || info.in_jsx_block
648 || info.in_mdx_comment
649 || info.in_mkdocstrings
650 || info.in_pymdown_block
651 || info.in_mkdocs_container()
652 || info.is_div_marker
653 })
654 || trimmed.starts_with('#')
655 || trimmed.starts_with("```")
656 || trimmed.starts_with("~~~")
657 || trimmed.starts_with('>')
658 || TableUtils::is_potential_table_row(content)
659 || is_list_item(trimmed)
660 || is_horizontal_rule(content)
661 || (trimmed.starts_with('[') && content.contains("]:"))
662 || is_template_directive_only(content)
663 || is_standalone_attr_list(content)
664 || is_snippet_block_delimiter(content)
665 || is_github_alert_marker(trimmed)
666 || is_html_only_line(content)
667 }
668
669 fn generate_blockquote_paragraph_fix(
670 &self,
671 ctx: &crate::lint_context::LintContext,
672 config: &MD013Config,
673 lines: &[&str],
674 line_index: &LineIndex,
675 start_idx: usize,
676 line_ending: &str,
677 ) -> (Option<LintWarning>, usize) {
678 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
679 return (None, start_idx + 1);
680 };
681 let target_level = start_bq.nesting_level;
682
683 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
684 let mut i = start_idx;
685
686 while i < lines.len() {
687 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
688 break;
689 }
690
691 let line_num = i + 1;
692 if line_num > ctx.lines.len() {
693 break;
694 }
695
696 if lines[i].trim().is_empty() {
697 break;
698 }
699
700 let line_bq = ctx.lines[i].blockquote.as_deref();
701 if let Some(bq) = line_bq {
702 if bq.nesting_level != target_level {
703 break;
704 }
705
706 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
707 break;
708 }
709
710 collected.push(CollectedBlockquoteLine {
711 line_idx: i,
712 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
713 });
714 i += 1;
715 continue;
716 }
717
718 let lazy_content = lines[i].trim_start();
719 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
720 break;
721 }
722
723 collected.push(CollectedBlockquoteLine {
724 line_idx: i,
725 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
726 });
727 i += 1;
728 }
729
730 if collected.is_empty() {
731 return (None, start_idx + 1);
732 }
733
734 let next_idx = i;
735 let paragraph_start = collected[0].line_idx;
736 let end_line = collected[collected.len() - 1].line_idx;
737 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
738 let paragraph_text = line_data
739 .iter()
740 .map(|d| d.content.as_str())
741 .collect::<Vec<_>>()
742 .join(" ");
743
744 let contains_definition_list = line_data
745 .iter()
746 .any(|d| crate::utils::is_definition_list_item(&d.content));
747 if contains_definition_list {
748 return (None, next_idx);
749 }
750
751 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
752 if contains_snippets {
753 return (None, next_idx);
754 }
755
756 let needs_reflow = match config.reflow_mode {
757 ReflowMode::Normalize => {
758 self.normalize_mode_needs_reflow(line_data.iter().map(|d| d.content.as_str()), config)
759 }
760 ReflowMode::SentencePerLine => {
761 let sentences = split_into_sentences(¶graph_text);
762 sentences.len() > 1 || line_data.len() > 1
763 }
764 ReflowMode::SemanticLineBreaks => {
765 let sentences = split_into_sentences(¶graph_text);
766 sentences.len() > 1
767 || line_data.len() > 1
768 || collected
769 .iter()
770 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
771 }
772 ReflowMode::Default => collected
773 .iter()
774 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
775 };
776
777 if !needs_reflow {
778 return (None, next_idx);
779 }
780
781 let fallback_prefix = start_bq.prefix.clone();
782 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
783 let continuation_style = blockquote_continuation_style(&line_data);
784
785 let reflow_line_length = if config.line_length.is_unlimited() {
786 usize::MAX
787 } else {
788 config
789 .line_length
790 .get()
791 .saturating_sub(self.calculate_string_length(&explicit_prefix))
792 .max(1)
793 };
794
795 let reflow_options = crate::utils::text_reflow::ReflowOptions {
796 line_length: reflow_line_length,
797 break_on_sentences: true,
798 preserve_breaks: false,
799 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
800 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
801 abbreviations: config.abbreviations_for_reflow(),
802 length_mode: self.reflow_length_mode(),
803 attr_lists: ctx.flavor.supports_attr_lists(),
804 require_sentence_capital: config.require_sentence_capital,
805 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
806 Some(4)
807 } else {
808 None
809 },
810 };
811
812 let reflowed_with_style =
813 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
814
815 if reflowed_with_style.is_empty() {
816 return (None, next_idx);
817 }
818
819 let reflowed_text = reflowed_with_style.join(line_ending);
820
821 let start_range = line_index.whole_line_range(paragraph_start + 1);
822 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
823 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
824 } else {
825 line_index.whole_line_range(end_line + 1)
826 };
827 let byte_range = start_range.start..end_range.end;
828
829 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
830 format!("{reflowed_text}{line_ending}")
831 } else {
832 reflowed_text
833 };
834
835 let original_text = &ctx.content[byte_range.clone()];
836 if original_text == replacement {
837 return (None, next_idx);
838 }
839
840 let (warning_line, warning_end_line) = match config.reflow_mode {
841 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
842 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
843 ReflowMode::Default => {
844 let violating_line = collected
845 .iter()
846 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
847 .map_or(paragraph_start + 1, |line| line.line_idx + 1);
848 (violating_line, violating_line)
849 }
850 };
851
852 let warning = LintWarning {
853 rule_name: Some(self.name().to_string()),
854 message: match config.reflow_mode {
855 ReflowMode::Normalize => format!(
856 "Paragraph could be normalized to use line length of {} characters",
857 config.line_length.get()
858 ),
859 ReflowMode::SentencePerLine => {
860 let num_sentences = split_into_sentences(¶graph_text).len();
861 if line_data.len() == 1 {
862 format!("Line contains {num_sentences} sentences (one sentence per line required)")
863 } else {
864 let num_lines = line_data.len();
865 format!(
866 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
867 )
868 }
869 }
870 ReflowMode::SemanticLineBreaks => {
871 let num_sentences = split_into_sentences(¶graph_text).len();
872 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
873 }
874 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
875 },
876 line: warning_line,
877 column: 1,
878 end_line: warning_end_line,
879 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
880 severity: Severity::Warning,
881 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
882 };
883
884 (Some(warning), next_idx)
885 }
886
887 /// Generate paragraph-based fixes
888 fn generate_paragraph_fixes(
889 &self,
890 ctx: &crate::lint_context::LintContext,
891 config: &MD013Config,
892 lines: &[&str],
893 ) -> Vec<LintWarning> {
894 let mut warnings = Vec::new();
895 let line_index = LineIndex::new(ctx.content);
896
897 // Detect the content's line ending style to preserve it in replacements.
898 // The LSP receives content from editors which may use CRLF (Windows).
899 // Replacements must match the original line endings to avoid false positives.
900 let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
901
902 let mut i = 0;
903 while i < lines.len() {
904 let line_num = i + 1;
905
906 // Handle blockquote paragraphs with style-preserving reflow.
907 // Skip blockquotes when blockquotes=false or paragraphs=false
908 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
909 if !config.blockquotes || !config.paragraphs {
910 // Skip past all blockquote lines (explicit and lazy continuations).
911 // A lazy continuation is a non-blank line without `>` that follows
912 // a blockquote line and isn't a structural element.
913 let mut saw_explicit_bq = false;
914 while i < lines.len() && i < ctx.lines.len() {
915 if ctx.lines[i].blockquote.is_some() {
916 saw_explicit_bq = true;
917 i += 1;
918 } else if saw_explicit_bq
919 && !lines[i].trim().is_empty()
920 && !lines[i].trim_start().starts_with('#')
921 && !lines[i].trim_start().starts_with('>')
922 {
923 // Lazy continuation of preceding blockquote
924 i += 1;
925 } else {
926 break;
927 }
928 }
929 continue;
930 }
931 let (warning, next_idx) =
932 self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
933 if let Some(warning) = warning {
934 warnings.push(warning);
935 }
936 i = next_idx;
937 continue;
938 }
939
940 // Skip special structures (but NOT MkDocs containers - those get special handling)
941 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
942 info.in_code_block
943 || info.in_front_matter
944 || info.in_html_block
945 || info.in_html_comment
946 || info.in_esm_block
947 || info.in_jsx_expression
948 || info.in_jsx_block
949 || info.in_mdx_comment
950 || info.in_mkdocstrings
951 || info.in_pymdown_block
952 });
953
954 // Skip link reference definitions but NOT footnote definitions.
955 // Footnote definitions (`[^id]: prose`) contain reflowable text,
956 // while link reference definitions (`[ref]: URL`) contain URLs
957 // that cannot be shortened.
958 let is_link_ref_def =
959 lines[i].trim().starts_with('[') && !lines[i].trim().starts_with("[^") && lines[i].contains("]:");
960
961 if should_skip_due_to_line_info
962 || lines[i].trim().starts_with('#')
963 || TableUtils::is_potential_table_row(lines[i])
964 || lines[i].trim().is_empty()
965 || is_horizontal_rule(lines[i])
966 || is_template_directive_only(lines[i])
967 || is_link_ref_def
968 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
969 || is_html_only_line(lines[i])
970 {
971 i += 1;
972 continue;
973 }
974
975 // Handle footnote definitions: `[^id]: prose text that can be reflowed`
976 // Supports multi-paragraph footnotes with code blocks, blockquotes,
977 // tables, and lists preserved verbatim.
978 // Validate structure: must start with `[^`, contain `]:`, and the ID
979 // must not contain `[` or `]` (prevents false matches on nested brackets)
980 if lines[i].trim().starts_with("[^") && lines[i].contains("]:") && {
981 let after_caret = &lines[i].trim()[2..];
982 after_caret
983 .find("]:")
984 .is_some_and(|pos| pos > 0 && !after_caret[..pos].contains(['[', ']']))
985 } {
986 let footnote_start = i;
987 let line = lines[i];
988
989 // Extract the prefix `[^id]:`
990 let Some(colon_pos) = line.find("]:") else {
991 i += 1;
992 continue;
993 };
994 let prefix_end = colon_pos + 2;
995 let prefix = &line[..prefix_end];
996
997 // Content starts after `]: ` (with optional space)
998 let content_start = if line[prefix_end..].starts_with(' ') {
999 prefix_end + 1
1000 } else {
1001 prefix_end
1002 };
1003 let first_content = &line[content_start..];
1004
1005 // CommonMark footnotes use 4-space continuation indent
1006 const FN_INDENT: usize = 4;
1007
1008 // --- Line classification for footnote content ---
1009 #[derive(Debug, Clone)]
1010 enum FnLineType {
1011 Content(String),
1012 Verbatim(String, usize), // preserved text, original indent
1013 Empty,
1014 }
1015
1016 // Helper: compute visual indent (tabs = 4 spaces)
1017 let visual_indent = |s: &str| -> usize {
1018 s.chars()
1019 .take_while(|c| c.is_whitespace())
1020 .map(|c| if c == '\t' { 4 } else { 1 })
1021 .sum::<usize>()
1022 };
1023
1024 // Helper: check if a trimmed line is a fence marker (homogeneous chars)
1025 let is_fence = |s: &str| -> bool {
1026 let t = s.trim();
1027 let fence_char = t.chars().next();
1028 matches!(fence_char, Some('`') | Some('~'))
1029 && t.chars().take_while(|&c| c == fence_char.unwrap()).count() >= 3
1030 };
1031
1032 // Helper: check if a trimmed line is a setext underline
1033 let is_setext_underline = |s: &str| -> bool {
1034 let t = s.trim();
1035 !t.is_empty()
1036 && (t.chars().all(|c| c == '=' || c == ' ') || t.chars().all(|c| c == '-' || c == ' '))
1037 && t.contains(['=', '-'])
1038 };
1039
1040 // Deferred body: `[^id]:\n content` — first line has no content,
1041 // actual content starts on the next indented line
1042 let deferred_body = first_content.trim().is_empty();
1043
1044 // Collect all lines belonging to this footnote definition
1045 let mut fn_lines: Vec<FnLineType> = Vec::new();
1046 if !deferred_body {
1047 fn_lines.push(FnLineType::Content(first_content.to_string()));
1048 }
1049 let mut last_consumed = i;
1050 i += 1;
1051
1052 // Strip only the footnote continuation indent, preserving
1053 // internal indentation (e.g., code block body indent)
1054 let strip_fn_indent = |s: &str| -> String {
1055 let mut chars = s.chars();
1056 let mut stripped = 0;
1057 while stripped < FN_INDENT {
1058 match chars.next() {
1059 Some('\t') => stripped += 4,
1060 Some(c) if c.is_whitespace() => stripped += 1,
1061 _ => break,
1062 }
1063 }
1064 chars.as_str().to_string()
1065 };
1066
1067 let mut in_fenced_code = false;
1068 let mut consecutive_blanks = 0u32;
1069
1070 while i < lines.len() {
1071 let next = lines[i];
1072 let next_trimmed = next.trim();
1073
1074 // Blank line handling
1075 if next_trimmed.is_empty() {
1076 consecutive_blanks += 1;
1077 // 2+ consecutive blanks terminate the footnote
1078 if consecutive_blanks >= 2 {
1079 break;
1080 }
1081
1082 // Inside a fenced code block, blank lines are part of the code
1083 if in_fenced_code {
1084 consecutive_blanks = 0; // Don't count blanks inside code blocks
1085 fn_lines.push(FnLineType::Verbatim(String::new(), 0));
1086 last_consumed = i;
1087 i += 1;
1088 continue;
1089 }
1090
1091 // Peek ahead: if next non-blank line is indented >= FN_INDENT,
1092 // this blank is an internal paragraph separator
1093 if i + 1 < lines.len() {
1094 let peek = lines[i + 1];
1095 let peek_indent = visual_indent(peek);
1096 if !peek.trim().is_empty() && peek_indent >= FN_INDENT {
1097 fn_lines.push(FnLineType::Empty);
1098 last_consumed = i;
1099 i += 1;
1100 continue;
1101 }
1102 }
1103 // No valid continuation after blank — end of footnote
1104 break;
1105 }
1106
1107 consecutive_blanks = 0;
1108 let indent = visual_indent(next);
1109
1110 // Not indented enough — end of footnote
1111 if indent < FN_INDENT {
1112 break;
1113 }
1114
1115 // Inside a fenced code block: everything is verbatim until closing fence
1116 if in_fenced_code {
1117 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1118 if is_fence(next_trimmed) {
1119 in_fenced_code = false;
1120 }
1121 last_consumed = i;
1122 i += 1;
1123 continue;
1124 }
1125
1126 // Fence opener — start verbatim code block
1127 if is_fence(next_trimmed) {
1128 in_fenced_code = true;
1129 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1130 last_consumed = i;
1131 i += 1;
1132 continue;
1133 }
1134
1135 // Indented code block: indent >= FN_INDENT + 4 (= 8 spaces)
1136 if indent >= FN_INDENT + 4 {
1137 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1138 last_consumed = i;
1139 i += 1;
1140 continue;
1141 }
1142
1143 // Structural content that must be preserved verbatim
1144 if next_trimmed.starts_with('#')
1145 || is_list_item(next_trimmed)
1146 || next_trimmed.starts_with('>')
1147 || TableUtils::is_potential_table_row(next_trimmed)
1148 || is_setext_underline(next_trimmed)
1149 || is_horizontal_rule(next_trimmed)
1150 || crate::utils::mkdocs_footnotes::is_footnote_definition(next_trimmed)
1151 {
1152 // Preserve verbatim: blockquotes, tables, lists, setext
1153 // underlines, and horizontal rules inside the footnote
1154 if next_trimmed.starts_with('>')
1155 || TableUtils::is_potential_table_row(next_trimmed)
1156 || is_list_item(next_trimmed)
1157 || is_setext_underline(next_trimmed)
1158 || is_horizontal_rule(next_trimmed)
1159 {
1160 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1161 last_consumed = i;
1162 i += 1;
1163 continue;
1164 }
1165 // Headings, new footnote defs, link refs — end the footnote
1166 break;
1167 }
1168
1169 // Link reference definitions inside footnotes are not reflowable
1170 if next_trimmed.starts_with('[')
1171 && !next_trimmed.starts_with("[^")
1172 && next_trimmed.contains("]:")
1173 && LINK_REF_PATTERN.is_match(next_trimmed)
1174 {
1175 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1176 last_consumed = i;
1177 i += 1;
1178 continue;
1179 }
1180
1181 // HTML-only lines inside footnotes are not reflowable
1182 if is_html_only_line(next_trimmed) {
1183 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1184 last_consumed = i;
1185 i += 1;
1186 continue;
1187 }
1188
1189 // Regular prose content
1190 fn_lines.push(FnLineType::Content(next_trimmed.to_string()));
1191 last_consumed = i;
1192 i += 1;
1193 }
1194
1195 // Nothing collected or only empty lines
1196 if fn_lines.iter().all(|l| matches!(l, FnLineType::Empty)) || fn_lines.is_empty() {
1197 continue;
1198 }
1199
1200 // --- Group into blocks ---
1201 #[derive(Debug)]
1202 enum FnBlock {
1203 Paragraph(Vec<String>),
1204 Verbatim(Vec<(String, usize)>), // (content, indent) preserved as-is
1205 }
1206
1207 let mut blocks: Vec<FnBlock> = Vec::new();
1208 let mut current_para: Vec<String> = Vec::new();
1209 let mut current_verbatim: Vec<(String, usize)> = Vec::new();
1210
1211 for fl in &fn_lines {
1212 match fl {
1213 FnLineType::Content(s) => {
1214 if !current_verbatim.is_empty() {
1215 blocks.push(FnBlock::Verbatim(std::mem::take(&mut current_verbatim)));
1216 }
1217 current_para.push(s.clone());
1218 }
1219 FnLineType::Verbatim(s, indent) => {
1220 if !current_para.is_empty() {
1221 blocks.push(FnBlock::Paragraph(std::mem::take(&mut current_para)));
1222 }
1223 current_verbatim.push((s.clone(), *indent));
1224 }
1225 FnLineType::Empty => {
1226 if !current_para.is_empty() {
1227 blocks.push(FnBlock::Paragraph(std::mem::take(&mut current_para)));
1228 }
1229 if !current_verbatim.is_empty() {
1230 blocks.push(FnBlock::Verbatim(std::mem::take(&mut current_verbatim)));
1231 }
1232 }
1233 }
1234 }
1235 if !current_para.is_empty() {
1236 blocks.push(FnBlock::Paragraph(current_para));
1237 }
1238 if !current_verbatim.is_empty() {
1239 blocks.push(FnBlock::Verbatim(current_verbatim));
1240 }
1241
1242 // --- Reflow paragraphs and reconstruct ---
1243 let prefix_display_width = prefix.chars().count() + 1; // +1 for space
1244 let reflow_line_length = if config.line_length.is_unlimited() {
1245 usize::MAX
1246 } else {
1247 config
1248 .line_length
1249 .get()
1250 .saturating_sub(FN_INDENT.max(prefix_display_width))
1251 .max(20)
1252 };
1253 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1254 line_length: reflow_line_length,
1255 break_on_sentences: true,
1256 preserve_breaks: false,
1257 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1258 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1259 abbreviations: config.abbreviations_for_reflow(),
1260 length_mode: self.reflow_length_mode(),
1261 attr_lists: ctx.flavor.supports_attr_lists(),
1262 require_sentence_capital: config.require_sentence_capital,
1263 max_list_continuation_indent: None,
1264 };
1265
1266 let indent_str = " ".repeat(FN_INDENT);
1267 let mut result_lines: Vec<String> = Vec::new();
1268 let mut is_first_block = true;
1269
1270 for block in &blocks {
1271 match block {
1272 FnBlock::Paragraph(para_lines) => {
1273 let paragraph_text = para_lines.join(" ");
1274 let paragraph_text = paragraph_text.trim();
1275 if paragraph_text.is_empty() {
1276 continue;
1277 }
1278
1279 let reflowed = crate::utils::text_reflow::reflow_line(paragraph_text, &reflow_options);
1280 if reflowed.is_empty() {
1281 continue;
1282 }
1283
1284 // Blank line separator between blocks
1285 if !result_lines.is_empty() {
1286 result_lines.push(String::new());
1287 }
1288
1289 for (idx, rline) in reflowed.iter().enumerate() {
1290 if is_first_block && idx == 0 {
1291 result_lines.push(format!("{prefix} {rline}"));
1292 } else {
1293 result_lines.push(format!("{indent_str}{rline}"));
1294 }
1295 }
1296 is_first_block = false;
1297 }
1298 FnBlock::Verbatim(verb_lines) => {
1299 // Blank line separator between blocks
1300 if !result_lines.is_empty() {
1301 result_lines.push(String::new());
1302 }
1303
1304 if is_first_block {
1305 // Verbatim as first block in a deferred-body footnote
1306 if deferred_body {
1307 result_lines.push(prefix.to_string());
1308 }
1309 is_first_block = false;
1310 }
1311 for (content, _orig_indent) in verb_lines {
1312 result_lines.push(format!("{indent_str}{content}"));
1313 }
1314 }
1315 }
1316 }
1317
1318 // If nothing was produced, skip
1319 if result_lines.is_empty() {
1320 continue;
1321 }
1322
1323 let reflowed_text = result_lines.join(line_ending);
1324
1325 // Calculate byte range using last_consumed
1326 let start_range = line_index.whole_line_range(footnote_start + 1);
1327 let end_range = if last_consumed == lines.len() - 1 && !ctx.content.ends_with('\n') {
1328 line_index.line_text_range(last_consumed + 1, 1, lines[last_consumed].len() + 1)
1329 } else {
1330 line_index.whole_line_range(last_consumed + 1)
1331 };
1332 let byte_range = start_range.start..end_range.end;
1333
1334 let replacement = if last_consumed < lines.len() - 1 || ctx.content.ends_with('\n') {
1335 format!("{reflowed_text}{line_ending}")
1336 } else {
1337 reflowed_text
1338 };
1339
1340 let original_text = &ctx.content[byte_range.clone()];
1341 let max_length = (footnote_start..=last_consumed)
1342 .map(|idx| self.calculate_effective_length(lines[idx]))
1343 .max()
1344 .unwrap_or(0);
1345 let line_limit = if config.line_length.is_unlimited() {
1346 usize::MAX
1347 } else {
1348 config.line_length.get()
1349 };
1350 if original_text != replacement && max_length > line_limit {
1351 warnings.push(LintWarning {
1352 rule_name: Some(self.name().to_string()),
1353 message: format!(
1354 "Line length {} exceeds {} characters",
1355 max_length,
1356 config.line_length.get()
1357 ),
1358 line: footnote_start + 1,
1359 column: 1,
1360 end_line: last_consumed + 1,
1361 end_column: lines[last_consumed].len() + 1,
1362 severity: Severity::Warning,
1363 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
1364 });
1365 }
1366 continue;
1367 }
1368
1369 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
1370 if ctx
1371 .line_info(line_num)
1372 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container)
1373 {
1374 // Skip admonition/tab marker lines — only reflow their indented content
1375 let current_line = lines[i];
1376 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
1377 i += 1;
1378 continue;
1379 }
1380
1381 let container_start = i;
1382
1383 // Detect the actual indent level from the first content line
1384 // (supports nested admonitions with 8+ spaces)
1385 let first_line = lines[i];
1386 let base_indent_len = first_line.len() - first_line.trim_start().len();
1387 let base_indent: String = " ".repeat(base_indent_len);
1388
1389 // Collect consecutive MkDocs container paragraph lines
1390 let mut container_lines: Vec<&str> = Vec::new();
1391 while i < lines.len() {
1392 let current_line_num = i + 1;
1393 let line_info = ctx.line_info(current_line_num);
1394
1395 // Stop if we leave the MkDocs container
1396 if !line_info.is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container) {
1397 break;
1398 }
1399
1400 let line = lines[i];
1401
1402 // Stop at paragraph boundaries within the container
1403 if line.trim().is_empty() {
1404 break;
1405 }
1406
1407 // Skip list items, code blocks, headings, HTML-only lines within containers
1408 if is_list_item(line.trim())
1409 || line.trim().starts_with("```")
1410 || line.trim().starts_with("~~~")
1411 || line.trim().starts_with('#')
1412 || is_html_only_line(line)
1413 {
1414 break;
1415 }
1416
1417 container_lines.push(line);
1418 i += 1;
1419 }
1420
1421 if container_lines.is_empty() {
1422 // Must advance i to avoid infinite loop when we encounter
1423 // non-paragraph content (code block, list, heading, empty line)
1424 // at the start of an MkDocs container
1425 i += 1;
1426 continue;
1427 }
1428
1429 // Strip the base indent from each line and join for reflow
1430 let stripped_lines: Vec<&str> = container_lines
1431 .iter()
1432 .map(|line| {
1433 if line.starts_with(&base_indent) {
1434 &line[base_indent_len..]
1435 } else {
1436 line.trim_start()
1437 }
1438 })
1439 .collect();
1440 let paragraph_text = stripped_lines.join(" ");
1441
1442 // Check if reflow is needed
1443 let needs_reflow = match config.reflow_mode {
1444 ReflowMode::Normalize => self.normalize_mode_needs_reflow(container_lines.iter().copied(), config),
1445 ReflowMode::SentencePerLine => {
1446 let sentences = split_into_sentences(¶graph_text);
1447 sentences.len() > 1 || container_lines.len() > 1
1448 }
1449 ReflowMode::SemanticLineBreaks => {
1450 let sentences = split_into_sentences(¶graph_text);
1451 sentences.len() > 1
1452 || container_lines.len() > 1
1453 || container_lines
1454 .iter()
1455 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1456 }
1457 ReflowMode::Default => container_lines
1458 .iter()
1459 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
1460 };
1461
1462 if !needs_reflow {
1463 continue;
1464 }
1465
1466 // Calculate byte range for this container paragraph
1467 let start_range = line_index.whole_line_range(container_start + 1);
1468 let end_line = container_start + container_lines.len() - 1;
1469 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1470 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1471 } else {
1472 line_index.whole_line_range(end_line + 1)
1473 };
1474 let byte_range = start_range.start..end_range.end;
1475
1476 // Reflow with adjusted line length (accounting for the 4-space indent)
1477 let reflow_line_length = if config.line_length.is_unlimited() {
1478 usize::MAX
1479 } else {
1480 config.line_length.get().saturating_sub(base_indent_len).max(1)
1481 };
1482 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1483 line_length: reflow_line_length,
1484 break_on_sentences: true,
1485 preserve_breaks: false,
1486 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1487 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1488 abbreviations: config.abbreviations_for_reflow(),
1489 length_mode: self.reflow_length_mode(),
1490 attr_lists: ctx.flavor.supports_attr_lists(),
1491 require_sentence_capital: config.require_sentence_capital,
1492 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
1493 Some(4)
1494 } else {
1495 None
1496 },
1497 };
1498 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1499
1500 // Re-add the 4-space indent to each reflowed line
1501 let reflowed_with_indent: Vec<String> =
1502 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
1503 let reflowed_text = reflowed_with_indent.join(line_ending);
1504
1505 // Preserve trailing newline
1506 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1507 format!("{reflowed_text}{line_ending}")
1508 } else {
1509 reflowed_text
1510 };
1511
1512 // Only generate a warning if the replacement is different
1513 let original_text = &ctx.content[byte_range.clone()];
1514 if original_text != replacement {
1515 warnings.push(LintWarning {
1516 rule_name: Some(self.name().to_string()),
1517 message: format!(
1518 "Line length {} exceeds {} characters (in MkDocs container)",
1519 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
1520 config.line_length.get()
1521 ),
1522 line: container_start + 1,
1523 column: 1,
1524 end_line: end_line + 1,
1525 end_column: lines[end_line].len() + 1,
1526 severity: Severity::Warning,
1527 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
1528 });
1529 }
1530 continue;
1531 }
1532
1533 // Helper function to detect semantic line markers
1534 let is_semantic_line = |content: &str| -> bool {
1535 let trimmed = content.trim_start();
1536 let semantic_markers = [
1537 "NOTE:",
1538 "WARNING:",
1539 "IMPORTANT:",
1540 "CAUTION:",
1541 "TIP:",
1542 "DANGER:",
1543 "HINT:",
1544 "INFO:",
1545 ];
1546 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
1547 };
1548
1549 // Helper function to detect fence markers (opening or closing)
1550 let is_fence_marker = |content: &str| -> bool {
1551 let trimmed = content.trim_start();
1552 trimmed.starts_with("```") || trimmed.starts_with("~~~")
1553 };
1554
1555 // Check if this is a list item - handle it specially
1556 let trimmed = lines[i].trim();
1557 if is_list_item(trimmed) {
1558 // Collect the entire list item including continuation lines
1559 let list_start = i;
1560 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
1561 let marker_len = marker.len();
1562
1563 // Checkbox ([ ]/[x]/[X]) is inline content, not part of the list marker.
1564 // Use the base bullet/number marker width for continuation recognition
1565 // so that continuation lines at 2+ spaces are collected for "- [ ] " items.
1566 let base_marker_len = if marker.contains("[ ] ") || marker.contains("[x] ") || marker.contains("[X] ") {
1567 marker.find('[').unwrap_or(marker_len)
1568 } else {
1569 marker_len
1570 };
1571
1572 // MkDocs flavor requires at least 4 spaces for list continuation
1573 // after a blank line (multi-paragraph list items). For non-blank
1574 // continuation (lines directly following the marker line), use
1575 // the natural marker width so that 2-space indent is recognized.
1576 let item_indent = ctx.lines[i].indent;
1577 let min_continuation_indent = if ctx.flavor.requires_strict_list_indent() {
1578 // Use 4-space relative indent from the list item's nesting level
1579 item_indent + (base_marker_len - item_indent).max(4)
1580 } else {
1581 marker_len
1582 };
1583 let content_continuation_indent = base_marker_len;
1584
1585 // Track lines and their types (content, code block, fence, nested list)
1586 #[derive(Clone)]
1587 enum LineType {
1588 Content(String),
1589 CodeBlock(String, usize), // content and original indent
1590 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
1591 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1592 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
1593 AdmonitionHeader(String, usize), // header text (e.g. "!!! note") and original indent
1594 AdmonitionContent(String, usize), // body content text and original indent
1595 Table(String, usize), // GFM table row, preserved verbatim with original indent
1596 Empty,
1597 }
1598
1599 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1600 i += 1;
1601
1602 // Collect continuation lines using ctx.lines for metadata
1603 while i < lines.len() {
1604 let line_info = &ctx.lines[i];
1605
1606 // Use pre-computed is_blank from ctx
1607 if line_info.is_blank {
1608 // Empty line - check if next line is indented (part of list item)
1609 if i + 1 < lines.len() {
1610 let next_info = &ctx.lines[i + 1];
1611
1612 // Check if next line is indented enough to be continuation
1613 if !next_info.is_blank && next_info.indent >= min_continuation_indent {
1614 // This blank line is between paragraphs/blocks in the list item
1615 list_item_lines.push(LineType::Empty);
1616 i += 1;
1617 continue;
1618 }
1619 }
1620 // No indented line after blank, end of list item
1621 break;
1622 }
1623
1624 // Use pre-computed indent from ctx
1625 let indent = line_info.indent;
1626
1627 // Valid continuation must be indented at least content_continuation_indent.
1628 // For non-blank continuation, use marker_len (e.g. 2 for "- ").
1629 // MkDocs strict 4-space requirement applies only after blank lines.
1630 if indent >= content_continuation_indent {
1631 let trimmed = line_info.content(ctx.content).trim();
1632
1633 // Check for MkDocs admonition lines inside list items BEFORE
1634 // checking in_code_block. Lines inside code blocks within
1635 // admonitions have both in_admonition and in_code_block set;
1636 // admonition membership takes priority so the entire admonition
1637 // structure (including embedded code blocks) is preserved.
1638 if line_info.in_admonition {
1639 let raw_content = line_info.content(ctx.content);
1640 if mkdocs_admonitions::is_admonition_start(raw_content) {
1641 let header_text = raw_content[indent..].trim_end().to_string();
1642 list_item_lines.push(LineType::AdmonitionHeader(header_text, indent));
1643 } else {
1644 let body_text = raw_content[indent..].trim_end().to_string();
1645 list_item_lines.push(LineType::AdmonitionContent(body_text, indent));
1646 }
1647 i += 1;
1648 continue;
1649 }
1650
1651 // Use pre-computed in_code_block from ctx
1652 if line_info.in_code_block {
1653 list_item_lines.push(LineType::CodeBlock(
1654 line_info.content(ctx.content)[indent..].to_string(),
1655 indent,
1656 ));
1657 i += 1;
1658 continue;
1659 }
1660
1661 // Check if this is a SIBLING list item (breaks parent)
1662 // Nested lists are indented >= marker_len and are PART of the parent item
1663 // Siblings are at indent < marker_len (at or before parent marker)
1664 if is_list_item(trimmed) && indent < marker_len {
1665 // This is a sibling item at same or higher level - end parent item
1666 break;
1667 }
1668
1669 // Nested list items are always processed independently
1670 // by the outer loop, so break when we encounter one.
1671 // If a blank line was collected before this, uncollect it
1672 // so the outer loop preserves the blank between parent and nested.
1673 if is_list_item(trimmed) && indent >= marker_len {
1674 if matches!(list_item_lines.last(), Some(LineType::Empty)) {
1675 list_item_lines.pop();
1676 i -= 1;
1677 }
1678 break;
1679 }
1680
1681 // Normal continuation vs indented code block.
1682 // Use min_continuation_indent for the threshold since
1683 // code blocks start 4 spaces beyond the expected content
1684 // level (which is min_continuation_indent for MkDocs).
1685 if indent <= min_continuation_indent + 3 {
1686 // Extract content (remove indentation and trailing whitespace)
1687 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1688 // See: https://github.com/rvben/rumdl/issues/76
1689 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1690
1691 // Check if this is a div marker (::: opening or closing)
1692 // These must be preserved on their own line, not merged into paragraphs
1693 if line_info.is_div_marker {
1694 list_item_lines.push(LineType::DivMarker(content));
1695 }
1696 // Check if this is a fence marker (opening or closing)
1697 // These should be treated as code block lines, not paragraph content
1698 else if is_fence_marker(&content) {
1699 list_item_lines.push(LineType::CodeBlock(content, indent));
1700 }
1701 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1702 else if is_semantic_line(&content) {
1703 list_item_lines.push(LineType::SemanticLine(content));
1704 }
1705 // Check if this is a snippet block delimiter (-8<- or --8<--)
1706 // These must be preserved on their own lines for MkDocs Snippets extension
1707 else if is_snippet_block_delimiter(&content) {
1708 list_item_lines.push(LineType::SnippetLine(content));
1709 }
1710 // Check if this is a GFM table row. Tables nested inside list
1711 // items must be preserved verbatim — joining them with prose
1712 // breaks the column structure.
1713 //
1714 // `is_potential_table_row` is intentionally permissive at the
1715 // row level: any line with `|` and 2+ cells qualifies. To avoid
1716 // misclassifying prose continuation lines that contain a literal
1717 // pipe (e.g. "use grep | sort to ..."), require one of:
1718 // - the row is pipe-bordered (`| ... |`), the canonical form
1719 // for tables nested in lists; or
1720 // - the next line is a delimiter row (this is a header); or
1721 // - the previous classified line was already a Table (this is
1722 // a continuation row).
1723 else if TableUtils::is_potential_table_row(&content) && {
1724 let pipe_bordered = content.trim().starts_with('|') && content.trim().ends_with('|');
1725 let next_is_delim = ctx
1726 .lines
1727 .get(i + 1)
1728 .is_some_and(|next| TableUtils::is_delimiter_row(next.content(ctx.content)));
1729 let prev_was_table = matches!(list_item_lines.last(), Some(LineType::Table(..)));
1730 pipe_bordered || next_is_delim || prev_was_table
1731 } {
1732 list_item_lines.push(LineType::Table(content, indent));
1733 } else {
1734 list_item_lines.push(LineType::Content(content));
1735 }
1736 i += 1;
1737 } else {
1738 // indent >= min_continuation_indent + 4: indented code block
1739 list_item_lines.push(LineType::CodeBlock(
1740 line_info.content(ctx.content)[indent..].to_string(),
1741 indent,
1742 ));
1743 i += 1;
1744 }
1745 } else {
1746 // Not indented enough, end of list item
1747 break;
1748 }
1749 }
1750
1751 // Determine the output continuation indent.
1752 // Normalize/Default modes canonicalize to min_continuation_indent
1753 // (fixing over-indented continuation). Semantic/SentencePerLine
1754 // modes preserve the user's actual indent since they only fix
1755 // line breaking, not indentation.
1756 let indent_size = match config.reflow_mode {
1757 ReflowMode::SemanticLineBreaks | ReflowMode::SentencePerLine => {
1758 // Find indent of the first plain text continuation line,
1759 // skipping the marker line (index 0), nested list items,
1760 // code blocks, and blank lines.
1761 list_item_lines
1762 .iter()
1763 .enumerate()
1764 .skip(1)
1765 .find_map(|(k, lt)| {
1766 if matches!(lt, LineType::Content(_)) {
1767 Some(ctx.lines[list_start + k].indent)
1768 } else {
1769 None
1770 }
1771 })
1772 .unwrap_or(min_continuation_indent)
1773 }
1774 _ => min_continuation_indent,
1775 };
1776 // For checkbox items in mkdocs flavor, enforce minimum indent so
1777 // continuation lines use the structural list indent (4), not the
1778 // content-aligned indent (6) which Python-Markdown doesn't support
1779 let has_checkbox = base_marker_len < marker_len;
1780 let indent_size = if has_checkbox && ctx.flavor.requires_strict_list_indent() {
1781 indent_size.max(min_continuation_indent)
1782 } else {
1783 indent_size
1784 };
1785 let expected_indent = " ".repeat(indent_size);
1786
1787 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1788 let mut builder = BlockBuilder::new();
1789 for line in &list_item_lines {
1790 match line {
1791 LineType::Empty => builder.feed_blank_line(),
1792 LineType::Content(content) => builder.feed_content(content),
1793 LineType::CodeBlock(content, indent) => builder.feed_code_line(content, *indent),
1794 LineType::SemanticLine(content) => builder.feed_semantic_line(content),
1795 LineType::SnippetLine(content) => builder.feed_snippet_line(content),
1796 LineType::DivMarker(content) => builder.feed_div_marker(content),
1797 LineType::AdmonitionHeader(header_text, indent) => {
1798 builder.feed_admonition_header(header_text, *indent)
1799 }
1800 LineType::AdmonitionContent(content, indent) => {
1801 builder.feed_admonition_content(content, *indent)
1802 }
1803 LineType::Table(content, indent) => builder.feed_table_line(content, *indent),
1804 }
1805 }
1806 let blocks = builder.finalize();
1807
1808 // Helper: check if a line (raw source or stripped content) is exempt
1809 // from line-length checks. Link reference definitions are always exempt;
1810 // standalone link/image lines are exempt when strict mode is off.
1811 // Also checks content after stripping list markers, since list item
1812 // continuation lines may contain link ref defs.
1813 let is_exempt_line = |raw_line: &str| -> bool {
1814 let trimmed = raw_line.trim();
1815 // Link reference definitions: always exempt
1816 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
1817 return true;
1818 }
1819 // Also check after stripping list markers (for list item content)
1820 if is_list_item(trimmed) {
1821 let (_, content) = extract_list_marker_and_content(trimmed);
1822 let content_trimmed = content.trim();
1823 if content_trimmed.starts_with('[')
1824 && content_trimmed.contains("]:")
1825 && LINK_REF_PATTERN.is_match(content_trimmed)
1826 {
1827 return true;
1828 }
1829 }
1830 // Standalone link/image lines: exempt when not strict
1831 if !config.strict && is_standalone_link_or_image_line(raw_line) {
1832 return true;
1833 }
1834 // HTML-only lines: exempt when not strict
1835 if !config.strict && is_html_only_line(raw_line) {
1836 return true;
1837 }
1838 false
1839 };
1840
1841 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1842 // Exclude link reference definitions and standalone link lines from content
1843 // so they don't pollute combined_content or trigger false reflow.
1844 let content_lines: Vec<String> = list_item_lines
1845 .iter()
1846 .filter_map(|line| {
1847 if let LineType::Content(s) = line {
1848 if is_exempt_line(s) {
1849 return None;
1850 }
1851 Some(s.clone())
1852 } else {
1853 None
1854 }
1855 })
1856 .collect();
1857
1858 // Check if we need to reflow this list item
1859 // We check the combined content to see if it exceeds length limits
1860 let combined_content = content_lines.join(" ").trim().to_string();
1861
1862 // Helper to check if we should reflow in normalize mode
1863 let should_normalize = || {
1864 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1865 // DO normalize if it has plain text content that spans multiple lines
1866 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1867 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1868 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1869 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1870 let has_admonitions = blocks.iter().any(|b| matches!(b, Block::Admonition { .. }));
1871 let has_tables = blocks.iter().any(|b| matches!(b, Block::Table { .. }));
1872 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1873
1874 // If we have structural blocks but no paragraphs, don't normalize
1875 if (has_code_blocks
1876 || has_semantic_lines
1877 || has_snippet_lines
1878 || has_div_markers
1879 || has_admonitions
1880 || has_tables)
1881 && !has_paragraphs
1882 {
1883 return false;
1884 }
1885
1886 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1887 if has_paragraphs {
1888 // Count only paragraphs that contain at least one non-exempt line.
1889 // Paragraphs consisting entirely of link ref defs or standalone links
1890 // should not trigger normalization.
1891 let paragraph_count = blocks
1892 .iter()
1893 .filter(|b| {
1894 if let Block::Paragraph(para_lines) = b {
1895 !para_lines.iter().all(|line| is_exempt_line(line))
1896 } else {
1897 false
1898 }
1899 })
1900 .count();
1901 if paragraph_count > 1 {
1902 // Multiple non-exempt paragraph blocks should be normalized
1903 return true;
1904 }
1905
1906 // Single paragraph block: normalize if it has multiple content lines
1907 if content_lines.len() > 1 {
1908 return true;
1909 }
1910 }
1911
1912 false
1913 };
1914
1915 let needs_reflow = match config.reflow_mode {
1916 ReflowMode::Normalize => {
1917 // Only reflow if:
1918 // 1. Any non-exempt paragraph, when joined, exceeds the limit, OR
1919 // 2. Any admonition content line exceeds the limit, OR
1920 // 3. The list item should be normalized (has multi-line plain text)
1921 let any_paragraph_exceeds = blocks.iter().any(|block| match block {
1922 Block::Paragraph(para_lines) => {
1923 if para_lines.iter().all(|line| is_exempt_line(line)) {
1924 return false;
1925 }
1926 let joined = para_lines.join(" ");
1927 let with_marker = format!("{}{}", " ".repeat(indent_size), joined.trim());
1928 self.calculate_effective_length(&with_marker) > config.line_length.get()
1929 }
1930 Block::Admonition {
1931 content_lines,
1932 header_indent,
1933 ..
1934 } => content_lines.iter().any(|(content, indent)| {
1935 if content.is_empty() {
1936 return false;
1937 }
1938 let with_indent = format!("{}{}", " ".repeat(*indent.max(header_indent)), content);
1939 self.calculate_effective_length(&with_indent) > config.line_length.get()
1940 }),
1941 _ => false,
1942 });
1943 if any_paragraph_exceeds {
1944 true
1945 } else {
1946 should_normalize()
1947 }
1948 }
1949 ReflowMode::SentencePerLine => {
1950 // Check if list item has multiple sentences
1951 let sentences = split_into_sentences(&combined_content);
1952 sentences.len() > 1
1953 }
1954 ReflowMode::SemanticLineBreaks => {
1955 let sentences = split_into_sentences(&combined_content);
1956 sentences.len() > 1
1957 || (list_start..i).any(|line_idx| {
1958 let line = lines[line_idx];
1959 let trimmed = line.trim();
1960 if trimmed.is_empty() || is_exempt_line(line) {
1961 return false;
1962 }
1963 self.calculate_effective_length(line) > config.line_length.get()
1964 })
1965 }
1966 ReflowMode::Default => {
1967 // In default mode, only reflow if any individual non-exempt line exceeds limit
1968 (list_start..i).any(|line_idx| {
1969 let line = lines[line_idx];
1970 let trimmed = line.trim();
1971 // Skip blank lines and exempt lines
1972 if trimmed.is_empty() || is_exempt_line(line) {
1973 return false;
1974 }
1975 self.calculate_effective_length(line) > config.line_length.get()
1976 })
1977 }
1978 };
1979
1980 if needs_reflow {
1981 let start_range = line_index.whole_line_range(list_start + 1);
1982 let end_line = i - 1;
1983 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1984 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1985 } else {
1986 line_index.whole_line_range(end_line + 1)
1987 };
1988 let byte_range = start_range.start..end_range.end;
1989
1990 // Reflow each block (paragraphs only, preserve code blocks)
1991 // When line_length = 0 (no limit), use a very large value for reflow
1992 let reflow_line_length = if config.line_length.is_unlimited() {
1993 usize::MAX
1994 } else {
1995 config.line_length.get().saturating_sub(indent_size).max(1)
1996 };
1997 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1998 line_length: reflow_line_length,
1999 break_on_sentences: true,
2000 preserve_breaks: false,
2001 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2002 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2003 abbreviations: config.abbreviations_for_reflow(),
2004 length_mode: self.reflow_length_mode(),
2005 attr_lists: ctx.flavor.supports_attr_lists(),
2006 require_sentence_capital: config.require_sentence_capital,
2007 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2008 Some(4)
2009 } else {
2010 None
2011 },
2012 };
2013
2014 let mut result: Vec<String> = Vec::new();
2015 let mut is_first_block = true;
2016
2017 for (block_idx, block) in blocks.iter().enumerate() {
2018 match block {
2019 Block::Paragraph(para_lines) => {
2020 // If every line in this paragraph is exempt (link ref defs,
2021 // standalone links), preserve the paragraph verbatim instead
2022 // of reflowing it. Reflowing would corrupt link ref defs.
2023 let all_exempt = para_lines.iter().all(|line| is_exempt_line(line));
2024
2025 if all_exempt {
2026 for (idx, line) in para_lines.iter().enumerate() {
2027 if is_first_block && idx == 0 {
2028 result.push(format!("{marker}{line}"));
2029 is_first_block = false;
2030 } else {
2031 result.push(format!("{expected_indent}{line}"));
2032 }
2033 }
2034 } else {
2035 // Split the paragraph into segments at hard break boundaries
2036 // Each segment can be reflowed independently
2037 let segments = split_into_segments(para_lines);
2038
2039 for (segment_idx, segment) in segments.iter().enumerate() {
2040 // Check if this segment ends with a hard break and what type
2041 let hard_break_type = segment.last().and_then(|line| {
2042 let line = line.strip_suffix('\r').unwrap_or(line);
2043 if line.ends_with('\\') {
2044 Some("\\")
2045 } else if line.ends_with(" ") {
2046 Some(" ")
2047 } else {
2048 None
2049 }
2050 });
2051
2052 // Join and reflow the segment (removing the hard break marker for processing)
2053 let segment_for_reflow: Vec<String> = segment
2054 .iter()
2055 .map(|line| {
2056 // Strip hard break marker (2 spaces or backslash) for reflow processing
2057 if line.ends_with('\\') {
2058 line[..line.len() - 1].trim_end().to_string()
2059 } else if line.ends_with(" ") {
2060 line[..line.len() - 2].trim_end().to_string()
2061 } else {
2062 line.clone()
2063 }
2064 })
2065 .collect();
2066
2067 let segment_text = segment_for_reflow.join(" ").trim().to_string();
2068 if !segment_text.is_empty() {
2069 let reflowed =
2070 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
2071
2072 if is_first_block && segment_idx == 0 {
2073 // First segment of first block starts with marker
2074 result.push(format!("{marker}{}", reflowed[0]));
2075 for line in reflowed.iter().skip(1) {
2076 result.push(format!("{expected_indent}{line}"));
2077 }
2078 is_first_block = false;
2079 } else {
2080 // Subsequent segments
2081 for line in reflowed {
2082 result.push(format!("{expected_indent}{line}"));
2083 }
2084 }
2085
2086 // If this segment had a hard break, add it back to the last line
2087 // Preserve the original hard break format (backslash or two spaces)
2088 if let Some(break_marker) = hard_break_type
2089 && let Some(last_line) = result.last_mut()
2090 {
2091 last_line.push_str(break_marker);
2092 }
2093 }
2094 }
2095 }
2096
2097 // Add blank line after paragraph block if there's a next block.
2098 // Check if next block is a code block that doesn't want a preceding blank.
2099 // Also don't add blank lines before snippet lines (they should stay tight).
2100 // Only add if not already ending with one (avoids double blanks).
2101 if block_idx < blocks.len() - 1 {
2102 let next_block = &blocks[block_idx + 1];
2103 let should_add_blank = match next_block {
2104 Block::Code {
2105 has_preceding_blank, ..
2106 } => *has_preceding_blank,
2107 Block::Table {
2108 has_preceding_blank, ..
2109 } => *has_preceding_blank,
2110 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2111 _ => true, // For all other blocks, add blank line
2112 };
2113 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2114 result.push(String::new());
2115 }
2116 }
2117 }
2118 Block::Code {
2119 lines: code_lines,
2120 has_preceding_blank: _,
2121 } => {
2122 // Preserve code blocks as-is with original indentation
2123 // NOTE: Blank line before code block is handled by the previous block
2124 // (see paragraph block's logic above)
2125
2126 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
2127 if is_first_block && idx == 0 {
2128 // First line of first block gets marker
2129 result.push(format!(
2130 "{marker}{}",
2131 " ".repeat(orig_indent - marker_len) + content
2132 ));
2133 is_first_block = false;
2134 } else if content.is_empty() {
2135 result.push(String::new());
2136 } else {
2137 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2138 }
2139 }
2140 }
2141 Block::SemanticLine(content) => {
2142 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
2143 // Only add blank before if not already ending with one.
2144 if !is_first_block && result.last().is_none_or(|s: &String| !s.is_empty()) {
2145 result.push(String::new());
2146 }
2147
2148 if is_first_block {
2149 // First block starts with marker
2150 result.push(format!("{marker}{content}"));
2151 is_first_block = false;
2152 } else {
2153 // Subsequent blocks use expected indent
2154 result.push(format!("{expected_indent}{content}"));
2155 }
2156
2157 // Add blank line after semantic line if there's a next block.
2158 // Only add if not already ending with one.
2159 if block_idx < blocks.len() - 1 {
2160 let next_block = &blocks[block_idx + 1];
2161 let should_add_blank = match next_block {
2162 Block::Code {
2163 has_preceding_blank, ..
2164 } => *has_preceding_blank,
2165 Block::Table {
2166 has_preceding_blank, ..
2167 } => *has_preceding_blank,
2168 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2169 _ => true, // For all other blocks, add blank line
2170 };
2171 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2172 result.push(String::new());
2173 }
2174 }
2175 }
2176 Block::SnippetLine(content) => {
2177 // Preserve snippet delimiters (-8<-) as-is on their own line
2178 // Unlike semantic lines, snippet lines don't add extra blank lines
2179 if is_first_block {
2180 // First block starts with marker
2181 result.push(format!("{marker}{content}"));
2182 is_first_block = false;
2183 } else {
2184 // Subsequent blocks use expected indent
2185 result.push(format!("{expected_indent}{content}"));
2186 }
2187 // No blank lines added before or after snippet delimiters
2188 }
2189 Block::DivMarker(content) => {
2190 // Preserve div markers (::: opening or closing) as-is on their own line
2191 if is_first_block {
2192 result.push(format!("{marker}{content}"));
2193 is_first_block = false;
2194 } else {
2195 result.push(format!("{expected_indent}{content}"));
2196 }
2197 }
2198 Block::Html {
2199 lines: html_lines,
2200 has_preceding_blank: _,
2201 } => {
2202 // Preserve HTML blocks exactly as-is with original indentation
2203 // NOTE: Blank line before HTML block is handled by the previous block
2204
2205 for (idx, line) in html_lines.iter().enumerate() {
2206 if is_first_block && idx == 0 {
2207 // First line of first block gets marker
2208 result.push(format!("{marker}{line}"));
2209 is_first_block = false;
2210 } else if line.is_empty() {
2211 // Preserve blank lines inside HTML blocks
2212 result.push(String::new());
2213 } else {
2214 // Preserve lines with their original content (already includes indentation)
2215 result.push(format!("{expected_indent}{line}"));
2216 }
2217 }
2218
2219 // Add blank line after HTML block if there's a next block.
2220 // Only add if not already ending with one (avoids double blanks
2221 // when the HTML block itself contained a trailing blank line).
2222 if block_idx < blocks.len() - 1 {
2223 let next_block = &blocks[block_idx + 1];
2224 let should_add_blank = match next_block {
2225 Block::Code {
2226 has_preceding_blank, ..
2227 } => *has_preceding_blank,
2228 Block::Html {
2229 has_preceding_blank, ..
2230 } => *has_preceding_blank,
2231 Block::Table {
2232 has_preceding_blank, ..
2233 } => *has_preceding_blank,
2234 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2235 _ => true, // For all other blocks, add blank line
2236 };
2237 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2238 result.push(String::new());
2239 }
2240 }
2241 }
2242 Block::Table {
2243 lines: table_lines,
2244 has_preceding_blank: _,
2245 } => {
2246 // Preserve table rows verbatim with their original indentation.
2247 // Reflowing rows would corrupt column alignment and inject `|`
2248 // characters mid-paragraph (issue #590).
2249 // The leading blank line is emitted by the previous block.
2250 for (idx, (content, orig_indent)) in table_lines.iter().enumerate() {
2251 if is_first_block && idx == 0 {
2252 // First line of first block gets the list marker
2253 result.push(format!(
2254 "{marker}{}",
2255 " ".repeat(orig_indent.saturating_sub(marker_len)) + content
2256 ));
2257 is_first_block = false;
2258 } else {
2259 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2260 }
2261 }
2262
2263 // Add blank line after table block if there's a next block.
2264 if block_idx < blocks.len() - 1 {
2265 let next_block = &blocks[block_idx + 1];
2266 let should_add_blank = match next_block {
2267 Block::Code {
2268 has_preceding_blank, ..
2269 } => *has_preceding_blank,
2270 Block::Html {
2271 has_preceding_blank, ..
2272 } => *has_preceding_blank,
2273 Block::Table {
2274 has_preceding_blank, ..
2275 } => *has_preceding_blank,
2276 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2277 _ => true,
2278 };
2279 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2280 result.push(String::new());
2281 }
2282 }
2283 }
2284 Block::Admonition {
2285 header,
2286 header_indent,
2287 content_lines: admon_lines,
2288 } => {
2289 // Reconstruct admonition block with header at original indent
2290 // and body content reflowed to fit within the line length limit
2291
2292 // Add blank line before admonition if not first block
2293 if !is_first_block && result.last().is_none_or(|s: &String| !s.is_empty()) {
2294 result.push(String::new());
2295 }
2296
2297 // Output the header at its original indent
2298 let header_indent_str = " ".repeat(*header_indent);
2299 if is_first_block {
2300 result.push(format!(
2301 "{marker}{}",
2302 " ".repeat(header_indent.saturating_sub(marker_len)) + header
2303 ));
2304 is_first_block = false;
2305 } else {
2306 result.push(format!("{header_indent_str}{header}"));
2307 }
2308
2309 // Derive body indent from the first non-empty content line's
2310 // stored indent, falling back to header_indent + 4 for
2311 // empty-body admonitions
2312 let body_indent = admon_lines
2313 .iter()
2314 .find(|(content, _)| !content.is_empty())
2315 .map_or(header_indent + 4, |(_, indent)| *indent);
2316 let body_indent_str = " ".repeat(body_indent);
2317
2318 // Segment body content into code blocks (verbatim) and
2319 // text paragraphs (reflowable), separated by blank lines.
2320 // Code lines store (content, orig_indent) to reconstruct
2321 // internal indentation relative to body_indent.
2322 enum AdmonSegment {
2323 Text(Vec<String>),
2324 Code(Vec<(String, usize)>),
2325 }
2326
2327 let mut segments: Vec<AdmonSegment> = Vec::new();
2328 let mut current_text: Vec<String> = Vec::new();
2329 let mut current_code: Vec<(String, usize)> = Vec::new();
2330 let mut in_admon_code = false;
2331 // Track the opening fence character so closing fences
2332 // must match (backticks close backticks, tildes close tildes)
2333 let mut fence_char: char = '`';
2334
2335 // Opening fences: ``` or ~~~ followed by optional info string
2336 let get_opening_fence = |s: &str| -> Option<(char, usize)> {
2337 let t = s.trim_start();
2338 if t.starts_with("```") {
2339 Some(('`', t.bytes().take_while(|&b| b == b'`').count()))
2340 } else if t.starts_with("~~~") {
2341 Some(('~', t.bytes().take_while(|&b| b == b'~').count()))
2342 } else {
2343 None
2344 }
2345 };
2346 // Closing fences: ONLY fence chars + optional trailing spaces
2347 let get_closing_fence = |s: &str| -> Option<(char, usize)> {
2348 let t = s.trim();
2349 if t.starts_with("```") && t.bytes().all(|b| b == b'`') {
2350 Some(('`', t.len()))
2351 } else if t.starts_with("~~~") && t.bytes().all(|b| b == b'~') {
2352 Some(('~', t.len()))
2353 } else {
2354 None
2355 }
2356 };
2357 let mut fence_len: usize = 3;
2358
2359 for (content, orig_indent) in admon_lines {
2360 if in_admon_code {
2361 // Closing fence must use the same character, be
2362 // at least as long, and have no info string
2363 if let Some((ch, len)) = get_closing_fence(content)
2364 && ch == fence_char
2365 && len >= fence_len
2366 {
2367 current_code.push((content.clone(), *orig_indent));
2368 in_admon_code = false;
2369 segments.push(AdmonSegment::Code(std::mem::take(&mut current_code)));
2370 continue;
2371 }
2372 current_code.push((content.clone(), *orig_indent));
2373 } else if let Some((ch, len)) = get_opening_fence(content) {
2374 if !current_text.is_empty() {
2375 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2376 }
2377 in_admon_code = true;
2378 fence_char = ch;
2379 fence_len = len;
2380 current_code.push((content.clone(), *orig_indent));
2381 } else if content.is_empty() {
2382 if !current_text.is_empty() {
2383 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2384 }
2385 } else {
2386 current_text.push(content.clone());
2387 }
2388 }
2389 if in_admon_code && !current_code.is_empty() {
2390 segments.push(AdmonSegment::Code(std::mem::take(&mut current_code)));
2391 }
2392 if !current_text.is_empty() {
2393 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2394 }
2395
2396 // Build reflow options once for all text segments
2397 let admon_reflow_length = if config.line_length.is_unlimited() {
2398 usize::MAX
2399 } else {
2400 config.line_length.get().saturating_sub(body_indent).max(1)
2401 };
2402
2403 let admon_reflow_options = crate::utils::text_reflow::ReflowOptions {
2404 line_length: admon_reflow_length,
2405 break_on_sentences: true,
2406 preserve_breaks: false,
2407 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2408 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2409 abbreviations: config.abbreviations_for_reflow(),
2410 length_mode: self.reflow_length_mode(),
2411 attr_lists: ctx.flavor.supports_attr_lists(),
2412 require_sentence_capital: config.require_sentence_capital,
2413 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2414 Some(4)
2415 } else {
2416 None
2417 },
2418 };
2419
2420 // Output each segment
2421 for segment in &segments {
2422 // Blank line before each segment (after the header or previous segment)
2423 result.push(String::new());
2424
2425 match segment {
2426 AdmonSegment::Code(lines) => {
2427 for (line, orig_indent) in lines {
2428 if line.is_empty() {
2429 // Preserve blank lines inside code blocks
2430 result.push(String::new());
2431 } else {
2432 // Reconstruct with body_indent + any extra
2433 // indentation the line had beyond body_indent
2434 let extra = orig_indent.saturating_sub(body_indent);
2435 let indent_str = " ".repeat(body_indent + extra);
2436 result.push(format!("{indent_str}{line}"));
2437 }
2438 }
2439 }
2440 AdmonSegment::Text(lines) => {
2441 let paragraph_text = lines.join(" ").trim().to_string();
2442 if paragraph_text.is_empty() {
2443 continue;
2444 }
2445 let reflowed = crate::utils::text_reflow::reflow_line(
2446 ¶graph_text,
2447 &admon_reflow_options,
2448 );
2449 for line in &reflowed {
2450 result.push(format!("{body_indent_str}{line}"));
2451 }
2452 }
2453 }
2454 }
2455
2456 // Add blank line after admonition if there's a next block
2457 if block_idx < blocks.len() - 1 {
2458 let next_block = &blocks[block_idx + 1];
2459 let should_add_blank = match next_block {
2460 Block::Code {
2461 has_preceding_blank, ..
2462 } => *has_preceding_blank,
2463 Block::Table {
2464 has_preceding_blank, ..
2465 } => *has_preceding_blank,
2466 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2467 _ => true,
2468 };
2469 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2470 result.push(String::new());
2471 }
2472 }
2473 }
2474 }
2475 }
2476
2477 let reflowed_text = result.join(line_ending);
2478
2479 // Preserve trailing newline
2480 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2481 format!("{reflowed_text}{line_ending}")
2482 } else {
2483 reflowed_text
2484 };
2485
2486 // Get the original text to compare
2487 let original_text = &ctx.content[byte_range.clone()];
2488
2489 // Physical-line-length scan, shared by the Normalize-mode gate and its
2490 // message. The list-item reflow preserves code blocks, HTML blocks,
2491 // admonition headers, fence markers, semantic markers, and snippet/div
2492 // markers verbatim; only paragraph content and admonition bodies are
2493 // restructured. Only those lines drive the length warning, so that
2494 // preserved-but-overlong content does not keep the paragraph-level
2495 // warning alive when the reflow would not fix that line.
2496 let should_count_for_length = |line_idx: usize| -> bool {
2497 let line = lines[line_idx];
2498 let trimmed = line.trim();
2499 if trimmed.is_empty() || is_exempt_line(line) {
2500 return false;
2501 }
2502 let info = &ctx.lines[line_idx];
2503 if info.in_code_block || info.in_html_block {
2504 return false;
2505 }
2506 if info.in_admonition && mkdocs_admonitions::is_admonition_start(line) {
2507 return false;
2508 }
2509 if is_fence_marker(line) || is_semantic_line(line) {
2510 return false;
2511 }
2512 if is_snippet_block_delimiter(line) {
2513 return false;
2514 }
2515 if line.trim_start().starts_with(":::") {
2516 return false;
2517 }
2518 true
2519 };
2520 let max_physical_length = (list_start..i)
2521 .filter(|&idx| should_count_for_length(idx))
2522 .map(|idx| self.calculate_effective_length(lines[idx]))
2523 .max()
2524 .unwrap_or(0);
2525 // `line-length = 0` means "no limit", so no physical line can be
2526 // "over"; the message below then describes a structural join rather
2527 // than a length violation.
2528 let any_paragraph_line_over =
2529 !config.line_length.is_unlimited() && max_physical_length > config.line_length.get();
2530
2531 // Normalize mode reflows list-item prose just like paragraphs:
2532 // joining continuation lines and re-wrapping to `line-length`.
2533 // `prose_changed` is true only when the reflow alters the words or
2534 // line breaks, not when it would merely re-indent continuation
2535 // lines or trim trailing whitespace. Comparing the texts with each
2536 // line's leading and trailing whitespace removed isolates "did the
2537 // words/line breaks change" from "did the surrounding whitespace
2538 // change". Continuation indentation is MD077's responsibility and
2539 // trailing whitespace is MD009's; an MD013 warning for either would
2540 // both duplicate those rules and resurface a persistent advisory on
2541 // already-fitting items that users disable MD013 fixing to avoid.
2542 let prose_changed = {
2543 let stripped = |text: &str| text.lines().map(str::trim).collect::<Vec<_>>().join("\n");
2544 stripped(original_text) != stripped(&replacement)
2545 };
2546 // Warn when the reflow rewraps prose (the normalize feature for
2547 // list items), or when a physical line genuinely exceeds the limit
2548 // and the reflow can change something (a true length violation,
2549 // even if all that changes is the continuation indent). A line that
2550 // is already optimal in both respects produces no warning.
2551 let gate_ok = prose_changed || (any_paragraph_line_over && original_text != replacement);
2552 if gate_ok {
2553 // Generate an appropriate message based on why reflow is needed
2554 let message = match config.reflow_mode {
2555 ReflowMode::SentencePerLine => {
2556 let num_sentences = split_into_sentences(&combined_content).len();
2557 let num_lines = content_lines.len();
2558 if num_lines == 1 {
2559 // Single line with multiple sentences
2560 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2561 } else {
2562 // Multiple lines - could be split sentences or mixed
2563 format!(
2564 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
2565 )
2566 }
2567 }
2568 ReflowMode::SemanticLineBreaks => {
2569 let num_sentences = split_into_sentences(&combined_content).len();
2570 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
2571 }
2572 ReflowMode::Normalize => {
2573 // When a physical line genuinely exceeds the limit, report
2574 // it as a length violation. Otherwise the reflow is a
2575 // structural normalization (joining/re-wrapping multi-line
2576 // content that already fits), mirroring the paragraph path.
2577 if any_paragraph_line_over {
2578 format!(
2579 "Line length {} exceeds {} characters",
2580 max_physical_length,
2581 config.line_length.get()
2582 )
2583 } else {
2584 format!(
2585 "List item could be normalized to use line length of {} characters",
2586 config.line_length.get()
2587 )
2588 }
2589 }
2590 ReflowMode::Default => {
2591 // Report the actual longest non-exempt line, not the combined content
2592 let max_length = (list_start..i)
2593 .filter(|&line_idx| {
2594 let line = lines[line_idx];
2595 let trimmed = line.trim();
2596 !trimmed.is_empty() && !is_exempt_line(line)
2597 })
2598 .map(|line_idx| self.calculate_effective_length(lines[line_idx]))
2599 .max()
2600 .unwrap_or(0);
2601 format!(
2602 "Line length {} exceeds {} characters",
2603 max_length,
2604 config.line_length.get()
2605 )
2606 }
2607 };
2608
2609 warnings.push(LintWarning {
2610 rule_name: Some(self.name().to_string()),
2611 message,
2612 line: list_start + 1,
2613 column: 1,
2614 end_line: end_line + 1,
2615 end_column: lines[end_line].len() + 1,
2616 severity: Severity::Warning,
2617 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
2618 });
2619 }
2620 }
2621 continue;
2622 }
2623
2624 // Found start of a paragraph - collect all lines in it
2625 let paragraph_start = i;
2626 let mut paragraph_lines = vec![lines[i]];
2627 i += 1;
2628
2629 while i < lines.len() {
2630 let next_line = lines[i];
2631 let next_line_num = i + 1;
2632 let next_trimmed = next_line.trim();
2633
2634 // Stop at paragraph boundaries
2635 if next_trimmed.is_empty()
2636 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
2637 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
2638 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
2639 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
2640 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
2641 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
2642 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_block)
2643 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
2644 || ctx
2645 .line_info(next_line_num)
2646 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container)
2647 || (next_line_num > 0
2648 && next_line_num <= ctx.lines.len()
2649 && ctx.lines[next_line_num - 1].blockquote.is_some())
2650 || next_trimmed.starts_with('#')
2651 || TableUtils::is_potential_table_row(next_line)
2652 || is_list_item(next_trimmed)
2653 || is_horizontal_rule(next_line)
2654 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
2655 || is_template_directive_only(next_line)
2656 || is_standalone_attr_list(next_line)
2657 || is_snippet_block_delimiter(next_line)
2658 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
2659 || is_html_only_line(next_line)
2660 {
2661 break;
2662 }
2663
2664 // Check if the previous line ends with a hard break (2+ spaces or backslash)
2665 if i > 0 && has_hard_break(lines[i - 1]) {
2666 // Don't include lines after hard breaks in the same paragraph
2667 break;
2668 }
2669
2670 paragraph_lines.push(next_line);
2671 i += 1;
2672 }
2673
2674 // Compute the common leading indent of all non-empty paragraph lines,
2675 // but only when those lines are structurally inside a list block.
2676 // Indented continuation lines that follow a nested list arrive here
2677 // with their structural indentation intact (e.g. 2 spaces for a
2678 // top-level list item). Stripping the indent before reflow and
2679 // re-applying it afterward prevents the fixer from moving those
2680 // lines to column 0.
2681 //
2682 // The list-block guard is essential: top-level paragraphs that happen
2683 // to start with spaces (insignificant in Markdown) must NOT have those
2684 // spaces preserved or injected by the fixer.
2685 let common_indent: String = if ctx.is_in_list_block(paragraph_start + 1) {
2686 let min_len = paragraph_lines
2687 .iter()
2688 .filter(|l| !l.trim().is_empty())
2689 .map(|l| l.len() - l.trim_start().len())
2690 .min()
2691 .unwrap_or(0);
2692 paragraph_lines
2693 .iter()
2694 .find(|l| !l.trim().is_empty())
2695 .map(|l| l[..min_len].to_string())
2696 .unwrap_or_default()
2697 } else {
2698 String::new()
2699 };
2700
2701 // Combine paragraph lines into a single string for processing.
2702 // This must be done BEFORE the needs_reflow check for sentence-per-line mode.
2703 let paragraph_text = if common_indent.is_empty() {
2704 paragraph_lines.join(" ")
2705 } else {
2706 paragraph_lines
2707 .iter()
2708 .map(|l| {
2709 if l.starts_with(common_indent.as_str()) {
2710 &l[common_indent.len()..]
2711 } else {
2712 l.trim_start()
2713 }
2714 })
2715 .collect::<Vec<_>>()
2716 .join(" ")
2717 };
2718
2719 // Skip reflowing if this paragraph contains definition list items
2720 // Definition lists are multi-line structures that should not be joined
2721 let contains_definition_list = paragraph_lines
2722 .iter()
2723 .any(|line| crate::utils::is_definition_list_item(line));
2724
2725 if contains_definition_list {
2726 // Don't reflow definition lists - skip this paragraph
2727 i = paragraph_start + paragraph_lines.len();
2728 continue;
2729 }
2730
2731 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2732 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2733 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2734
2735 if contains_snippets {
2736 // Don't reflow Snippets blocks - skip this paragraph
2737 i = paragraph_start + paragraph_lines.len();
2738 continue;
2739 }
2740
2741 // Check if this paragraph needs reflowing
2742 let needs_reflow = match config.reflow_mode {
2743 ReflowMode::Normalize => self.normalize_mode_needs_reflow(paragraph_lines.iter().copied(), config),
2744 ReflowMode::SentencePerLine => {
2745 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2746 // Note: we check the joined text because sentences can span multiple lines
2747 let sentences = split_into_sentences(¶graph_text);
2748
2749 // Always reflow if multiple sentences on one line
2750 if sentences.len() > 1 {
2751 true
2752 } else if paragraph_lines.len() > 1 {
2753 // For single-sentence paragraphs spanning multiple lines:
2754 // Reflow if they COULD fit on one line (respecting line-length constraint)
2755 if config.line_length.is_unlimited() {
2756 // No line-length constraint - always join single sentences
2757 true
2758 } else {
2759 // Only join if it fits within line-length.
2760 // paragraph_text has the common indent stripped, so add it
2761 // back to get the true output length before comparing.
2762 let effective_length =
2763 self.calculate_effective_length(¶graph_text) + common_indent.len();
2764 effective_length <= config.line_length.get()
2765 }
2766 } else {
2767 false
2768 }
2769 }
2770 ReflowMode::SemanticLineBreaks => {
2771 let sentences = split_into_sentences(¶graph_text);
2772 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2773 sentences.len() > 1
2774 || paragraph_lines.len() > 1
2775 || paragraph_lines
2776 .iter()
2777 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2778 }
2779 ReflowMode::Default => {
2780 // In default mode, only reflow if lines exceed limit
2781 paragraph_lines
2782 .iter()
2783 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2784 }
2785 };
2786
2787 if needs_reflow {
2788 // Calculate byte range for this paragraph
2789 // Use whole_line_range for each line and combine
2790 let start_range = line_index.whole_line_range(paragraph_start + 1);
2791 let end_line = paragraph_start + paragraph_lines.len() - 1;
2792
2793 // For the last line, we want to preserve any trailing newline
2794 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2795 // Last line without trailing newline - use line_text_range
2796 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2797 } else {
2798 // Not the last line or has trailing newline - use whole_line_range
2799 line_index.whole_line_range(end_line + 1)
2800 };
2801
2802 let byte_range = start_range.start..end_range.end;
2803
2804 // Check if the paragraph ends with a hard break and what type
2805 let hard_break_type = paragraph_lines.last().and_then(|line| {
2806 let line = line.strip_suffix('\r').unwrap_or(line);
2807 if line.ends_with('\\') {
2808 Some("\\")
2809 } else if line.ends_with(" ") {
2810 Some(" ")
2811 } else {
2812 None
2813 }
2814 });
2815
2816 // Reflow the paragraph
2817 // When line_length = 0 (no limit), use a very large value for reflow
2818 let reflow_line_length = if config.line_length.is_unlimited() {
2819 usize::MAX
2820 } else {
2821 config.line_length.get()
2822 };
2823 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2824 line_length: reflow_line_length,
2825 break_on_sentences: true,
2826 preserve_breaks: false,
2827 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2828 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2829 abbreviations: config.abbreviations_for_reflow(),
2830 length_mode: self.reflow_length_mode(),
2831 attr_lists: ctx.flavor.supports_attr_lists(),
2832 require_sentence_capital: config.require_sentence_capital,
2833 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2834 Some(4)
2835 } else {
2836 None
2837 },
2838 };
2839 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
2840
2841 // Re-apply the common indent to each non-empty reflowed line so
2842 // that the replacement preserves the original structural indentation.
2843 if !common_indent.is_empty() {
2844 for line in &mut reflowed {
2845 if !line.is_empty() {
2846 *line = format!("{common_indent}{line}");
2847 }
2848 }
2849 }
2850
2851 // If the original paragraph ended with a hard break, preserve it
2852 // Preserve the original hard break format (backslash or two spaces)
2853 if let Some(break_marker) = hard_break_type
2854 && !reflowed.is_empty()
2855 {
2856 let last_idx = reflowed.len() - 1;
2857 if !has_hard_break(&reflowed[last_idx]) {
2858 reflowed[last_idx].push_str(break_marker);
2859 }
2860 }
2861
2862 let reflowed_text = reflowed.join(line_ending);
2863
2864 // Preserve trailing newline if the original paragraph had one
2865 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2866 format!("{reflowed_text}{line_ending}")
2867 } else {
2868 reflowed_text
2869 };
2870
2871 // Get the original text to compare
2872 let original_text = &ctx.content[byte_range.clone()];
2873
2874 // Only generate a warning if the replacement is different from the original
2875 if original_text != replacement {
2876 // Create warning with actual fix
2877 // In default mode, report the specific line that violates
2878 // In normalize mode, report the whole paragraph
2879 // In sentence-per-line mode, report the entire paragraph
2880 let (warning_line, warning_end_line) = match config.reflow_mode {
2881 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2882 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2883 // Highlight the entire paragraph that needs reformatting
2884 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2885 }
2886 ReflowMode::Default => {
2887 // Find the first line that exceeds the limit
2888 let mut violating_line = paragraph_start;
2889 for (idx, line) in paragraph_lines.iter().enumerate() {
2890 if self.calculate_effective_length(line) > config.line_length.get() {
2891 violating_line = paragraph_start + idx;
2892 break;
2893 }
2894 }
2895 (violating_line + 1, violating_line + 1)
2896 }
2897 };
2898
2899 warnings.push(LintWarning {
2900 rule_name: Some(self.name().to_string()),
2901 message: match config.reflow_mode {
2902 ReflowMode::Normalize => format!(
2903 "Paragraph could be normalized to use line length of {} characters",
2904 config.line_length.get()
2905 ),
2906 ReflowMode::SentencePerLine => {
2907 let num_sentences = split_into_sentences(¶graph_text).len();
2908 if paragraph_lines.len() == 1 {
2909 // Single line with multiple sentences
2910 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2911 } else {
2912 let num_lines = paragraph_lines.len();
2913 // Multiple lines - could be split sentences or mixed
2914 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2915 }
2916 },
2917 ReflowMode::SemanticLineBreaks => {
2918 let num_sentences = split_into_sentences(¶graph_text).len();
2919 format!(
2920 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2921 )
2922 },
2923 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2924 },
2925 line: warning_line,
2926 column: 1,
2927 end_line: warning_end_line,
2928 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2929 severity: Severity::Warning,
2930 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
2931 });
2932 }
2933 }
2934 }
2935
2936 warnings
2937 }
2938
2939 /// Calculate string length based on the configured length mode
2940 fn calculate_string_length(&self, s: &str) -> usize {
2941 match self.config.length_mode {
2942 LengthMode::Chars => s.chars().count(),
2943 LengthMode::Visual => s.width(),
2944 LengthMode::Bytes => s.len(),
2945 }
2946 }
2947
2948 /// Calculate effective line length
2949 ///
2950 /// Returns the actual display length of the line using the configured length mode.
2951 fn calculate_effective_length(&self, line: &str) -> usize {
2952 self.calculate_string_length(line)
2953 }
2954
2955 /// Calculate line length with inline link/image URLs removed.
2956 ///
2957 /// For each inline link `[text](url)` or image `` on the line,
2958 /// computes the "savings" from removing the URL portion (keeping only `[text]`
2959 /// or `![alt]`). Returns `effective_length - total_savings`.
2960 ///
2961 /// Handles nested constructs (e.g., `[](url)`) by only counting the
2962 /// outermost construct to avoid double-counting.
2963 fn calculate_text_only_length(
2964 &self,
2965 effective_length: usize,
2966 line_number: usize,
2967 ctx: &crate::lint_context::LintContext,
2968 ) -> usize {
2969 let line_range = ctx.line_index.line_content_range(line_number);
2970 let line_byte_end = line_range.end;
2971
2972 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2973 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2974
2975 // Binary search: links are sorted by byte_offset, so link.line is non-decreasing
2976 let link_start = ctx.links.partition_point(|l| l.line < line_number);
2977 for link in &ctx.links[link_start..] {
2978 if link.line != line_number {
2979 break;
2980 }
2981 if link.is_reference {
2982 continue;
2983 }
2984 if !matches!(link.link_type, LinkType::Inline) {
2985 continue;
2986 }
2987 if link.byte_end > line_byte_end {
2988 continue;
2989 }
2990 let text_only_len = 2 + self.calculate_string_length(&link.text);
2991 constructs.push((link.byte_offset, link.byte_end, text_only_len));
2992 }
2993
2994 let img_start = ctx.images.partition_point(|i| i.line < line_number);
2995 for image in &ctx.images[img_start..] {
2996 if image.line != line_number {
2997 break;
2998 }
2999 if image.is_reference {
3000 continue;
3001 }
3002 if !matches!(image.link_type, LinkType::Inline) {
3003 continue;
3004 }
3005 if image.byte_end > line_byte_end {
3006 continue;
3007 }
3008 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
3009 constructs.push((image.byte_offset, image.byte_end, text_only_len));
3010 }
3011
3012 if constructs.is_empty() {
3013 return effective_length;
3014 }
3015
3016 // Sort by byte offset to handle overlapping/nested constructs
3017 constructs.sort_by_key(|&(start, _, _)| start);
3018
3019 let mut total_savings: usize = 0;
3020 let mut last_end: usize = 0;
3021
3022 for (start, end, text_only_len) in &constructs {
3023 // Skip constructs nested inside a previously counted one
3024 if *start < last_end {
3025 continue;
3026 }
3027 // Full construct length in configured length mode
3028 let full_source = &ctx.content[*start..*end];
3029 let full_len = self.calculate_string_length(full_source);
3030 total_savings += full_len.saturating_sub(*text_only_len);
3031 last_end = *end;
3032 }
3033
3034 effective_length.saturating_sub(total_savings)
3035 }
3036}