rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod block_builder;
22mod helpers;
23pub mod md013_config;
24use crate::utils::is_template_directive_only;
25use block_builder::{Block, BlockBuilder};
26use helpers::{
27 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_html_only_line,
28 is_list_item, is_standalone_link_or_image_line, is_unwrappable_line, split_into_segments,
29 trim_preserving_hard_break,
30};
31pub use md013_config::MD013Config;
32use md013_config::{LengthMode, ReflowMode};
33
34#[cfg(test)]
35mod tests;
36use unicode_width::UnicodeWidthStr;
37
38#[derive(Clone, Default)]
39pub struct MD013LineLength {
40 pub(crate) config: MD013Config,
41}
42
43/// Blockquote paragraph line collected for reflow, with original line index for range computation.
44struct CollectedBlockquoteLine {
45 line_idx: usize,
46 data: BlockquoteLineData,
47}
48
49impl MD013LineLength {
50 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
51 Self {
52 config: MD013Config {
53 line_length: crate::types::LineLength::new(line_length),
54 code_blocks,
55 tables,
56 headings,
57 paragraphs: true, // Default to true for backwards compatibility
58 blockquotes: true, // Default to true for backwards compatibility
59 strict,
60 stern: false,
61 heading_line_length: None,
62 code_block_line_length: None,
63 reflow: false,
64 reflow_mode: ReflowMode::default(),
65 length_mode: LengthMode::default(),
66 abbreviations: Vec::new(),
67 require_sentence_capital: true,
68 },
69 }
70 }
71
72 pub fn from_config_struct(config: MD013Config) -> Self {
73 Self { config }
74 }
75
76 /// Return a clone with code block checking disabled.
77 /// Used for doc comment linting where code blocks are Rust code managed by rustfmt.
78 pub fn with_code_blocks_disabled(&self) -> Self {
79 let mut clone = self.clone();
80 clone.config.code_blocks = false;
81 clone
82 }
83
84 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
85 fn reflow_length_mode(&self) -> ReflowLengthMode {
86 match self.config.length_mode {
87 LengthMode::Chars => ReflowLengthMode::Chars,
88 LengthMode::Visual => ReflowLengthMode::Visual,
89 LengthMode::Bytes => ReflowLengthMode::Bytes,
90 }
91 }
92
93 fn should_ignore_line(
94 &self,
95 line: &str,
96 _lines: &[&str],
97 current_line: usize,
98 ctx: &crate::lint_context::LintContext,
99 ) -> bool {
100 if self.config.strict {
101 return false;
102 }
103
104 // Quick check for common patterns before expensive regex
105 let trimmed = line.trim();
106
107 // Only skip if the entire line is a URL (quick check first)
108 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
109 return true;
110 }
111
112 // Only skip if the entire line is an image reference (quick check first)
113 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
114 return true;
115 }
116
117 // Note: link reference definitions are handled as always-exempt (even in strict mode)
118 // in the main check loop, so they don't need to be checked here.
119
120 // Code blocks with long strings (only check if in code block)
121 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
122 && !trimmed.is_empty()
123 && !line.contains(' ')
124 && !line.contains('\t')
125 {
126 return true;
127 }
128
129 false
130 }
131
132 /// Check if rule should skip based on provided config (used for inline config support)
133 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
134 // Skip if content is empty
135 if ctx.content.is_empty() {
136 return true;
137 }
138
139 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
140 if config.reflow
141 && (config.reflow_mode == ReflowMode::SentencePerLine
142 || config.reflow_mode == ReflowMode::SemanticLineBreaks
143 || config.reflow_mode == ReflowMode::Normalize)
144 {
145 return false;
146 }
147
148 // Use the smallest applicable budget across line/heading/code-block
149 // contexts so a stricter context-specific limit doesn't get masked by
150 // the document-wide budget.
151 let min_limit = config.min_effective_line_length();
152 if min_limit.is_unlimited() {
153 return true;
154 }
155 let min_limit_bytes = min_limit.get();
156
157 // Quick check: if total content is shorter than the smallest line limit,
158 // definitely skip.
159 if ctx.content.len() <= min_limit_bytes {
160 return true;
161 }
162
163 // Skip if no line exceeds the smallest applicable limit.
164 !ctx.lines.iter().any(|line| line.byte_len > min_limit_bytes)
165 }
166
167 fn normalize_mode_needs_reflow<'a, I>(&self, lines: I, config: &MD013Config) -> bool
168 where
169 I: IntoIterator<Item = &'a str>,
170 {
171 let mut line_count = 0;
172 let check_length = !config.line_length.is_unlimited();
173
174 for line in lines {
175 line_count += 1;
176 if check_length && self.calculate_effective_length(line) > config.line_length.get() {
177 return true;
178 }
179 }
180
181 line_count > 1
182 }
183}
184
185impl Rule for MD013LineLength {
186 fn name(&self) -> &'static str {
187 "MD013"
188 }
189
190 fn description(&self) -> &'static str {
191 "Line length should not be excessive"
192 }
193
194 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
195 // Use pre-parsed inline config from LintContext
196 let config_override = ctx.inline_config().get_rule_config("MD013");
197
198 // Apply configuration override if present
199 let effective_config = if let Some(json_config) = config_override {
200 if let Some(obj) = json_config.as_object() {
201 let mut config = self.config.clone();
202 if let Some(line_length) = obj.get("line_length").and_then(serde_json::Value::as_u64) {
203 config.line_length = crate::types::LineLength::new(line_length as usize);
204 }
205 if let Some(code_blocks) = obj.get("code_blocks").and_then(serde_json::Value::as_bool) {
206 config.code_blocks = code_blocks;
207 }
208 if let Some(tables) = obj.get("tables").and_then(serde_json::Value::as_bool) {
209 config.tables = tables;
210 }
211 if let Some(headings) = obj.get("headings").and_then(serde_json::Value::as_bool) {
212 config.headings = headings;
213 }
214 if let Some(blockquotes) = obj.get("blockquotes").and_then(serde_json::Value::as_bool) {
215 config.blockquotes = blockquotes;
216 }
217 if let Some(strict) = obj.get("strict").and_then(serde_json::Value::as_bool) {
218 config.strict = strict;
219 }
220 if let Some(reflow) = obj.get("reflow").and_then(serde_json::Value::as_bool) {
221 config.reflow = reflow;
222 }
223 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
224 config.reflow_mode = match reflow_mode {
225 "default" => ReflowMode::Default,
226 "normalize" => ReflowMode::Normalize,
227 "sentence-per-line" => ReflowMode::SentencePerLine,
228 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
229 _ => ReflowMode::default(),
230 };
231 }
232 config
233 } else {
234 self.config.clone()
235 }
236 } else {
237 self.config.clone()
238 };
239
240 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
241 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
242 if self.should_skip_with_config(ctx, &effective_config)
243 && !(effective_config.reflow
244 && (effective_config.reflow_mode == ReflowMode::Normalize
245 || effective_config.reflow_mode == ReflowMode::SentencePerLine
246 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
247 {
248 return Ok(Vec::new());
249 }
250
251 // Direct implementation without DocumentStructure
252 let mut warnings = Vec::new();
253
254 // Special handling: line_length = 0 means "no line length limit"
255 // Skip all line length checks, but still allow reflow if enabled
256 let skip_length_checks = effective_config.line_length.is_unlimited();
257
258 // Pre-filter lines that could be problematic to avoid processing all lines.
259 // Use the smallest applicable budget across line/heading/code-block contexts
260 // so candidates aren't dropped when a stricter context-specific budget applies.
261 let prefilter_limit = effective_config.min_effective_line_length();
262 let prefilter_skip = prefilter_limit.is_unlimited();
263 let mut candidate_lines = Vec::new();
264 if !skip_length_checks && !prefilter_skip {
265 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
266 // Skip front matter - it should never be linted
267 if line_info.in_front_matter {
268 continue;
269 }
270
271 // Quick length check first
272 if line_info.byte_len > prefilter_limit.get() {
273 candidate_lines.push(line_idx);
274 }
275 }
276 }
277
278 // If no candidate lines and not in normalize or sentence-per-line mode, early return
279 if candidate_lines.is_empty()
280 && !(effective_config.reflow
281 && (effective_config.reflow_mode == ReflowMode::Normalize
282 || effective_config.reflow_mode == ReflowMode::SentencePerLine
283 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
284 {
285 return Ok(warnings);
286 }
287
288 let lines = ctx.raw_lines();
289
290 // Create a quick lookup set for heading lines
291 // We need this for both the heading skip check AND the paragraphs check
292 let heading_lines_set: std::collections::HashSet<usize> = ctx
293 .lines
294 .iter()
295 .enumerate()
296 .filter(|(_, line)| line.heading.is_some())
297 .map(|(idx, _)| idx + 1)
298 .collect();
299
300 // Use pre-computed table blocks from context
301 // We need this for both the table skip check AND the paragraphs check
302 let table_blocks = &ctx.table_blocks;
303 let mut table_lines_set = std::collections::HashSet::new();
304 for table in table_blocks {
305 table_lines_set.insert(table.header_line + 1);
306 table_lines_set.insert(table.delimiter_line + 1);
307 for &line in &table.content_lines {
308 table_lines_set.insert(line + 1);
309 }
310 }
311
312 // Process candidate lines for line length checks
313 'line_loop: for &line_idx in &candidate_lines {
314 let line_number = line_idx + 1;
315 let line = lines[line_idx];
316
317 // Calculate actual line length (used in warning messages)
318 let effective_length = self.calculate_effective_length(line);
319
320 // Pick the context-specific limit: heading > code-block > paragraph.
321 // Headings dominate over code-block context if a setext underline ever
322 // overlaps a fenced range (defensive — these are mutually exclusive in
323 // practice, but the explicit ordering documents intent).
324 let is_heading_line = heading_lines_set.contains(&line_number);
325 let in_code_block = ctx.line_info(line_number).is_some_and(|info| info.in_code_block);
326 let line_limit = if is_heading_line {
327 effective_config.effective_heading_line_length().get()
328 } else if in_code_block {
329 effective_config.effective_code_block_line_length().get()
330 } else {
331 effective_config.line_length.get()
332 };
333
334 // A context-specific limit of 0 means "unlimited for this context".
335 if line_limit == 0 {
336 continue;
337 }
338
339 // Stern mode: like default, but the trailing-token forgiveness is
340 // disabled — a line with whitespace that exceeds the limit is a
341 // violation even if the excess is the final token. The "unwrappable"
342 // line exemption (single token, optionally prefixed by # or >) is
343 // still honored. Strict overrides stern entirely.
344 if effective_config.stern && !effective_config.strict && is_unwrappable_line(line) {
345 continue;
346 }
347
348 // Trailing-token forgiveness: only in default mode (not strict, not stern).
349 // If the line only exceeds the limit because of a long token at the end
350 // (URL, link chain, identifier), it passes. This matches markdownlint's
351 // behavior: line.replace(/\S*$/u, "#")
352 let check_length = if effective_config.strict || effective_config.stern {
353 effective_length
354 } else {
355 match line.rfind(char::is_whitespace) {
356 Some(pos) => {
357 let ws_char = line[pos..].chars().next().unwrap();
358 let prefix_end = pos + ws_char.len_utf8();
359 self.calculate_string_length(&line[..prefix_end]) + 1
360 }
361 None => 1, // No whitespace — entire line is a single token
362 }
363 };
364
365 // Skip lines where the check length is within the limit
366 if check_length <= line_limit {
367 continue;
368 }
369
370 // Semantic link understanding: suppress when excess comes entirely from inline URLs
371 if !effective_config.strict {
372 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
373 if text_only_length <= line_limit {
374 continue;
375 }
376 }
377
378 // Skip mkdocstrings and pymdown blocks (already handled by LintContext)
379 if ctx.lines[line_idx].in_mkdocstrings || ctx.lines[line_idx].in_pymdown_block {
380 continue;
381 }
382
383 // Link reference definitions are always exempt, even in strict mode.
384 // There's no way to shorten them without breaking the URL.
385 // Also check after stripping list markers, since list items may
386 // contain link ref defs as their content.
387 {
388 let trimmed = line.trim();
389 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
390 continue;
391 }
392 if is_list_item(trimmed) {
393 let (_, content) = extract_list_marker_and_content(trimmed);
394 let content_trimmed = content.trim();
395 if content_trimmed.starts_with('[')
396 && content_trimmed.contains("]:")
397 && LINK_REF_PATTERN.is_match(content_trimmed)
398 {
399 continue;
400 }
401 }
402 }
403
404 // Skip various block types efficiently
405 if !effective_config.strict {
406 // Lines whose only content is a link/image are exempt.
407 // After stripping list markers, blockquote markers, and emphasis,
408 // if only a link or image remains, there is no way to shorten it.
409 if is_standalone_link_or_image_line(line) {
410 continue;
411 }
412
413 // Lines consisting entirely of HTML tags are exempt.
414 // Badge lines, images with attributes, and similar inline HTML
415 // are long due to URLs in attributes and can't be meaningfully shortened.
416 if is_html_only_line(line) {
417 continue;
418 }
419
420 // Skip setext heading underlines
421 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
422 continue;
423 }
424
425 // Skip block elements according to config flags
426 // The flags mean: true = check these elements, false = skip these elements
427 // So we skip when the flag is FALSE and the line is in that element type
428 if (!effective_config.headings && heading_lines_set.contains(&line_number))
429 || (!effective_config.code_blocks
430 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
431 || (!effective_config.tables && table_lines_set.contains(&line_number))
432 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
433 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
434 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
435 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
436 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_block)
437 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
438 || ctx.line_info(line_number).is_some_and(|info| info.in_pymdown_block)
439 {
440 continue;
441 }
442
443 // Check if this is a paragraph/regular text line
444 // If paragraphs = false, skip lines that are NOT in special blocks
445 // Blockquote content is treated as paragraph text, so it's not
446 // included in the special blocks list here.
447 if !effective_config.paragraphs {
448 let is_special_block = heading_lines_set.contains(&line_number)
449 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
450 || table_lines_set.contains(&line_number)
451 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
452 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
453 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
454 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
455 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_block)
456 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
457 || ctx
458 .line_info(line_number)
459 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container);
460
461 // Skip regular paragraph text when paragraphs = false
462 if !is_special_block {
463 continue;
464 }
465 }
466
467 // Skip blockquote lines when blockquotes = false.
468 // Also skip lazy continuation lines that belong to a blockquote
469 // (lines without `>` prefix that follow a blockquote line).
470 if !effective_config.blockquotes {
471 if ctx.lines[line_number - 1].blockquote.is_some() {
472 continue;
473 }
474 // Check for lazy continuation: scan backwards through
475 // non-blank lines to find if this paragraph started with
476 // a blockquote marker
477 if !line.trim().is_empty() {
478 let mut scan = line_number.saturating_sub(2);
479 loop {
480 if ctx.lines[scan].blockquote.is_some() {
481 // Found a blockquote ancestor — this is a lazy continuation
482 continue 'line_loop;
483 }
484 if lines[scan].trim().is_empty() || scan == 0 {
485 break;
486 }
487 scan -= 1;
488 }
489 }
490 }
491
492 // Skip lines that are only a URL, image ref, or link ref
493 if self.should_ignore_line(line, lines, line_idx, ctx) {
494 continue;
495 }
496 }
497
498 // In sentence-per-line mode, check if this is a single long sentence
499 // If so, emit a warning without a fix (user must manually rephrase)
500 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
501 let sentences = split_into_sentences(line.trim());
502 if sentences.len() == 1 {
503 // Single sentence that's too long - warn but don't auto-fix
504 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
505
506 let (start_line, start_col, end_line, end_col) =
507 calculate_excess_range(line_number, line, line_limit);
508
509 warnings.push(LintWarning {
510 rule_name: Some(self.name().to_string()),
511 message,
512 line: start_line,
513 column: start_col,
514 end_line,
515 end_column: end_col,
516 severity: Severity::Warning,
517 fix: None, // No auto-fix for long single sentences
518 });
519 continue;
520 }
521 // Multiple sentences will be handled by paragraph-based reflow
522 continue;
523 }
524
525 // In semantic-line-breaks mode, skip per-line checks —
526 // all reflow is handled at the paragraph level with cascading splits
527 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
528 continue;
529 }
530
531 // Don't provide fix for individual lines when reflow is enabled
532 // Paragraph-based fixes will be handled separately
533 let fix = None;
534
535 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
536
537 // Calculate precise character range for the excess portion
538 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
539
540 warnings.push(LintWarning {
541 rule_name: Some(self.name().to_string()),
542 message,
543 line: start_line,
544 column: start_col,
545 end_line,
546 end_column: end_col,
547 severity: Severity::Warning,
548 fix,
549 });
550 }
551
552 // If reflow is enabled, generate paragraph-based fixes
553 if effective_config.reflow {
554 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
555 // Merge paragraph warnings with line warnings, removing duplicates
556 for pw in paragraph_warnings {
557 // Remove any line warnings that overlap with this paragraph
558 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
559 warnings.push(pw);
560 }
561 }
562
563 Ok(warnings)
564 }
565
566 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
567 // For CLI usage, apply fixes from warnings
568 // LSP will use the warning-based fixes directly
569 let warnings = self.check(ctx)?;
570 let warnings =
571 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
572
573 // If there are no fixes, return content unchanged
574 if !warnings.iter().any(|w| w.fix.is_some()) {
575 return Ok(ctx.content.to_string());
576 }
577
578 // Apply warning-based fixes
579 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
580 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
581 }
582
583 fn as_any(&self) -> &dyn std::any::Any {
584 self
585 }
586
587 fn category(&self) -> RuleCategory {
588 RuleCategory::Whitespace
589 }
590
591 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
592 self.should_skip_with_config(ctx, &self.config)
593 }
594
595 fn default_config_section(&self) -> Option<(String, toml::Value)> {
596 let table = crate::rule_config_serde::config_schema_table(&MD013Config::default())?;
597 if table.is_empty() {
598 None
599 } else {
600 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
601 }
602 }
603
604 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
605 let mut aliases = std::collections::HashMap::new();
606 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
607 aliases.insert("strict_sentences".to_string(), "require-sentence-capital".to_string());
608 aliases.insert("strict-sentences".to_string(), "require-sentence-capital".to_string());
609 Some(aliases)
610 }
611
612 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
613 where
614 Self: Sized,
615 {
616 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
617 // Use global line_length if rule-specific config still has default value
618 if rule_config.line_length.get() == 80 {
619 rule_config.line_length = config.global.line_length;
620 }
621 Box::new(Self::from_config_struct(rule_config))
622 }
623}
624
625impl MD013LineLength {
626 fn is_blockquote_content_boundary(
627 &self,
628 content: &str,
629 line_num: usize,
630 ctx: &crate::lint_context::LintContext,
631 ) -> bool {
632 let trimmed = content.trim();
633
634 trimmed.is_empty()
635 || ctx.line_info(line_num).is_some_and(|info| {
636 info.in_code_block
637 || info.in_front_matter
638 || info.in_html_block
639 || info.in_html_comment
640 || info.in_esm_block
641 || info.in_jsx_expression
642 || info.in_jsx_block
643 || info.in_mdx_comment
644 || info.in_mkdocstrings
645 || info.in_pymdown_block
646 || info.in_mkdocs_container()
647 || info.is_div_marker
648 })
649 || trimmed.starts_with('#')
650 || trimmed.starts_with("```")
651 || trimmed.starts_with("~~~")
652 || trimmed.starts_with('>')
653 || TableUtils::is_potential_table_row(content)
654 || is_list_item(trimmed)
655 || is_horizontal_rule(content)
656 || (trimmed.starts_with('[') && content.contains("]:"))
657 || is_template_directive_only(content)
658 || is_standalone_attr_list(content)
659 || is_snippet_block_delimiter(content)
660 || is_github_alert_marker(trimmed)
661 || is_html_only_line(content)
662 }
663
664 fn generate_blockquote_paragraph_fix(
665 &self,
666 ctx: &crate::lint_context::LintContext,
667 config: &MD013Config,
668 lines: &[&str],
669 line_index: &LineIndex,
670 start_idx: usize,
671 line_ending: &str,
672 ) -> (Option<LintWarning>, usize) {
673 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
674 return (None, start_idx + 1);
675 };
676 let target_level = start_bq.nesting_level;
677
678 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
679 let mut i = start_idx;
680
681 while i < lines.len() {
682 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
683 break;
684 }
685
686 let line_num = i + 1;
687 if line_num > ctx.lines.len() {
688 break;
689 }
690
691 if lines[i].trim().is_empty() {
692 break;
693 }
694
695 let line_bq = ctx.lines[i].blockquote.as_deref();
696 if let Some(bq) = line_bq {
697 if bq.nesting_level != target_level {
698 break;
699 }
700
701 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
702 break;
703 }
704
705 collected.push(CollectedBlockquoteLine {
706 line_idx: i,
707 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
708 });
709 i += 1;
710 continue;
711 }
712
713 let lazy_content = lines[i].trim_start();
714 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
715 break;
716 }
717
718 collected.push(CollectedBlockquoteLine {
719 line_idx: i,
720 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
721 });
722 i += 1;
723 }
724
725 if collected.is_empty() {
726 return (None, start_idx + 1);
727 }
728
729 let next_idx = i;
730 let paragraph_start = collected[0].line_idx;
731 let end_line = collected[collected.len() - 1].line_idx;
732 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
733 let paragraph_text = line_data
734 .iter()
735 .map(|d| d.content.as_str())
736 .collect::<Vec<_>>()
737 .join(" ");
738
739 let contains_definition_list = line_data
740 .iter()
741 .any(|d| crate::utils::is_definition_list_item(&d.content));
742 if contains_definition_list {
743 return (None, next_idx);
744 }
745
746 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
747 if contains_snippets {
748 return (None, next_idx);
749 }
750
751 let needs_reflow = match config.reflow_mode {
752 ReflowMode::Normalize => {
753 self.normalize_mode_needs_reflow(line_data.iter().map(|d| d.content.as_str()), config)
754 }
755 ReflowMode::SentencePerLine => {
756 let sentences = split_into_sentences(¶graph_text);
757 sentences.len() > 1 || line_data.len() > 1
758 }
759 ReflowMode::SemanticLineBreaks => {
760 let sentences = split_into_sentences(¶graph_text);
761 sentences.len() > 1
762 || line_data.len() > 1
763 || collected
764 .iter()
765 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
766 }
767 ReflowMode::Default => collected
768 .iter()
769 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
770 };
771
772 if !needs_reflow {
773 return (None, next_idx);
774 }
775
776 let fallback_prefix = start_bq.prefix.clone();
777 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
778 let continuation_style = blockquote_continuation_style(&line_data);
779
780 let reflow_line_length = if config.line_length.is_unlimited() {
781 usize::MAX
782 } else {
783 config
784 .line_length
785 .get()
786 .saturating_sub(self.calculate_string_length(&explicit_prefix))
787 .max(1)
788 };
789
790 let reflow_options = crate::utils::text_reflow::ReflowOptions {
791 line_length: reflow_line_length,
792 break_on_sentences: true,
793 preserve_breaks: false,
794 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
795 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
796 abbreviations: config.abbreviations_for_reflow(),
797 length_mode: self.reflow_length_mode(),
798 attr_lists: ctx.flavor.supports_attr_lists(),
799 require_sentence_capital: config.require_sentence_capital,
800 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
801 Some(4)
802 } else {
803 None
804 },
805 };
806
807 let reflowed_with_style =
808 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
809
810 if reflowed_with_style.is_empty() {
811 return (None, next_idx);
812 }
813
814 let reflowed_text = reflowed_with_style.join(line_ending);
815
816 let start_range = line_index.whole_line_range(paragraph_start + 1);
817 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
818 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
819 } else {
820 line_index.whole_line_range(end_line + 1)
821 };
822 let byte_range = start_range.start..end_range.end;
823
824 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
825 format!("{reflowed_text}{line_ending}")
826 } else {
827 reflowed_text
828 };
829
830 let original_text = &ctx.content[byte_range.clone()];
831 if original_text == replacement {
832 return (None, next_idx);
833 }
834
835 let (warning_line, warning_end_line) = match config.reflow_mode {
836 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
837 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
838 ReflowMode::Default => {
839 let violating_line = collected
840 .iter()
841 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
842 .map_or(paragraph_start + 1, |line| line.line_idx + 1);
843 (violating_line, violating_line)
844 }
845 };
846
847 let warning = LintWarning {
848 rule_name: Some(self.name().to_string()),
849 message: match config.reflow_mode {
850 ReflowMode::Normalize => format!(
851 "Paragraph could be normalized to use line length of {} characters",
852 config.line_length.get()
853 ),
854 ReflowMode::SentencePerLine => {
855 let num_sentences = split_into_sentences(¶graph_text).len();
856 if line_data.len() == 1 {
857 format!("Line contains {num_sentences} sentences (one sentence per line required)")
858 } else {
859 let num_lines = line_data.len();
860 format!(
861 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
862 )
863 }
864 }
865 ReflowMode::SemanticLineBreaks => {
866 let num_sentences = split_into_sentences(¶graph_text).len();
867 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
868 }
869 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
870 },
871 line: warning_line,
872 column: 1,
873 end_line: warning_end_line,
874 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
875 severity: Severity::Warning,
876 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
877 };
878
879 (Some(warning), next_idx)
880 }
881
882 /// Generate paragraph-based fixes
883 fn generate_paragraph_fixes(
884 &self,
885 ctx: &crate::lint_context::LintContext,
886 config: &MD013Config,
887 lines: &[&str],
888 ) -> Vec<LintWarning> {
889 let mut warnings = Vec::new();
890 let line_index = LineIndex::new(ctx.content);
891
892 // Detect the content's line ending style to preserve it in replacements.
893 // The LSP receives content from editors which may use CRLF (Windows).
894 // Replacements must match the original line endings to avoid false positives.
895 let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
896
897 let mut i = 0;
898 while i < lines.len() {
899 let line_num = i + 1;
900
901 // Handle blockquote paragraphs with style-preserving reflow.
902 // Skip blockquotes when blockquotes=false or paragraphs=false
903 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
904 if !config.blockquotes || !config.paragraphs {
905 // Skip past all blockquote lines (explicit and lazy continuations).
906 // A lazy continuation is a non-blank line without `>` that follows
907 // a blockquote line and isn't a structural element.
908 let mut saw_explicit_bq = false;
909 while i < lines.len() && i < ctx.lines.len() {
910 if ctx.lines[i].blockquote.is_some() {
911 saw_explicit_bq = true;
912 i += 1;
913 } else if saw_explicit_bq
914 && !lines[i].trim().is_empty()
915 && !lines[i].trim_start().starts_with('#')
916 && !lines[i].trim_start().starts_with('>')
917 {
918 // Lazy continuation of preceding blockquote
919 i += 1;
920 } else {
921 break;
922 }
923 }
924 continue;
925 }
926 let (warning, next_idx) =
927 self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
928 if let Some(warning) = warning {
929 warnings.push(warning);
930 }
931 i = next_idx;
932 continue;
933 }
934
935 // Skip special structures (but NOT MkDocs containers - those get special handling)
936 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
937 info.in_code_block
938 || info.in_front_matter
939 || info.in_html_block
940 || info.in_html_comment
941 || info.in_esm_block
942 || info.in_jsx_expression
943 || info.in_jsx_block
944 || info.in_mdx_comment
945 || info.in_mkdocstrings
946 || info.in_pymdown_block
947 });
948
949 // Skip link reference definitions but NOT footnote definitions.
950 // Footnote definitions (`[^id]: prose`) contain reflowable text,
951 // while link reference definitions (`[ref]: URL`) contain URLs
952 // that cannot be shortened.
953 let is_link_ref_def =
954 lines[i].trim().starts_with('[') && !lines[i].trim().starts_with("[^") && lines[i].contains("]:");
955
956 if should_skip_due_to_line_info
957 || lines[i].trim().starts_with('#')
958 || TableUtils::is_potential_table_row(lines[i])
959 || lines[i].trim().is_empty()
960 || is_horizontal_rule(lines[i])
961 || is_template_directive_only(lines[i])
962 || is_link_ref_def
963 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
964 || is_html_only_line(lines[i])
965 {
966 i += 1;
967 continue;
968 }
969
970 // Handle footnote definitions: `[^id]: prose text that can be reflowed`
971 // Supports multi-paragraph footnotes with code blocks, blockquotes,
972 // tables, and lists preserved verbatim.
973 // Validate structure: must start with `[^`, contain `]:`, and the ID
974 // must not contain `[` or `]` (prevents false matches on nested brackets)
975 if lines[i].trim().starts_with("[^") && lines[i].contains("]:") && {
976 let after_caret = &lines[i].trim()[2..];
977 after_caret
978 .find("]:")
979 .is_some_and(|pos| pos > 0 && !after_caret[..pos].contains(['[', ']']))
980 } {
981 let footnote_start = i;
982 let line = lines[i];
983
984 // Extract the prefix `[^id]:`
985 let Some(colon_pos) = line.find("]:") else {
986 i += 1;
987 continue;
988 };
989 let prefix_end = colon_pos + 2;
990 let prefix = &line[..prefix_end];
991
992 // Content starts after `]: ` (with optional space)
993 let content_start = if line[prefix_end..].starts_with(' ') {
994 prefix_end + 1
995 } else {
996 prefix_end
997 };
998 let first_content = &line[content_start..];
999
1000 // CommonMark footnotes use 4-space continuation indent
1001 const FN_INDENT: usize = 4;
1002
1003 // --- Line classification for footnote content ---
1004 #[derive(Debug, Clone)]
1005 enum FnLineType {
1006 Content(String),
1007 Verbatim(String, usize), // preserved text, original indent
1008 Empty,
1009 }
1010
1011 // Helper: compute visual indent (tabs = 4 spaces)
1012 let visual_indent = |s: &str| -> usize {
1013 s.chars()
1014 .take_while(|c| c.is_whitespace())
1015 .map(|c| if c == '\t' { 4 } else { 1 })
1016 .sum::<usize>()
1017 };
1018
1019 // Helper: check if a trimmed line is a fence marker (homogeneous chars)
1020 let is_fence = |s: &str| -> bool {
1021 let t = s.trim();
1022 let fence_char = t.chars().next();
1023 matches!(fence_char, Some('`') | Some('~'))
1024 && t.chars().take_while(|&c| c == fence_char.unwrap()).count() >= 3
1025 };
1026
1027 // Helper: check if a trimmed line is a setext underline
1028 let is_setext_underline = |s: &str| -> bool {
1029 let t = s.trim();
1030 !t.is_empty()
1031 && (t.chars().all(|c| c == '=' || c == ' ') || t.chars().all(|c| c == '-' || c == ' '))
1032 && t.contains(['=', '-'])
1033 };
1034
1035 // Deferred body: `[^id]:\n content` — first line has no content,
1036 // actual content starts on the next indented line
1037 let deferred_body = first_content.trim().is_empty();
1038
1039 // Collect all lines belonging to this footnote definition
1040 let mut fn_lines: Vec<FnLineType> = Vec::new();
1041 if !deferred_body {
1042 fn_lines.push(FnLineType::Content(first_content.to_string()));
1043 }
1044 let mut last_consumed = i;
1045 i += 1;
1046
1047 // Strip only the footnote continuation indent, preserving
1048 // internal indentation (e.g., code block body indent)
1049 let strip_fn_indent = |s: &str| -> String {
1050 let mut chars = s.chars();
1051 let mut stripped = 0;
1052 while stripped < FN_INDENT {
1053 match chars.next() {
1054 Some('\t') => stripped += 4,
1055 Some(c) if c.is_whitespace() => stripped += 1,
1056 _ => break,
1057 }
1058 }
1059 chars.as_str().to_string()
1060 };
1061
1062 let mut in_fenced_code = false;
1063 let mut consecutive_blanks = 0u32;
1064
1065 while i < lines.len() {
1066 let next = lines[i];
1067 let next_trimmed = next.trim();
1068
1069 // Blank line handling
1070 if next_trimmed.is_empty() {
1071 consecutive_blanks += 1;
1072 // 2+ consecutive blanks terminate the footnote
1073 if consecutive_blanks >= 2 {
1074 break;
1075 }
1076
1077 // Inside a fenced code block, blank lines are part of the code
1078 if in_fenced_code {
1079 consecutive_blanks = 0; // Don't count blanks inside code blocks
1080 fn_lines.push(FnLineType::Verbatim(String::new(), 0));
1081 last_consumed = i;
1082 i += 1;
1083 continue;
1084 }
1085
1086 // Peek ahead: if next non-blank line is indented >= FN_INDENT,
1087 // this blank is an internal paragraph separator
1088 if i + 1 < lines.len() {
1089 let peek = lines[i + 1];
1090 let peek_indent = visual_indent(peek);
1091 if !peek.trim().is_empty() && peek_indent >= FN_INDENT {
1092 fn_lines.push(FnLineType::Empty);
1093 last_consumed = i;
1094 i += 1;
1095 continue;
1096 }
1097 }
1098 // No valid continuation after blank — end of footnote
1099 break;
1100 }
1101
1102 consecutive_blanks = 0;
1103 let indent = visual_indent(next);
1104
1105 // Not indented enough — end of footnote
1106 if indent < FN_INDENT {
1107 break;
1108 }
1109
1110 // Inside a fenced code block: everything is verbatim until closing fence
1111 if in_fenced_code {
1112 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1113 if is_fence(next_trimmed) {
1114 in_fenced_code = false;
1115 }
1116 last_consumed = i;
1117 i += 1;
1118 continue;
1119 }
1120
1121 // Fence opener — start verbatim code block
1122 if is_fence(next_trimmed) {
1123 in_fenced_code = true;
1124 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1125 last_consumed = i;
1126 i += 1;
1127 continue;
1128 }
1129
1130 // Indented code block: indent >= FN_INDENT + 4 (= 8 spaces)
1131 if indent >= FN_INDENT + 4 {
1132 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1133 last_consumed = i;
1134 i += 1;
1135 continue;
1136 }
1137
1138 // Structural content that must be preserved verbatim
1139 if next_trimmed.starts_with('#')
1140 || is_list_item(next_trimmed)
1141 || next_trimmed.starts_with('>')
1142 || TableUtils::is_potential_table_row(next_trimmed)
1143 || is_setext_underline(next_trimmed)
1144 || is_horizontal_rule(next_trimmed)
1145 || crate::utils::mkdocs_footnotes::is_footnote_definition(next_trimmed)
1146 {
1147 // Preserve verbatim: blockquotes, tables, lists, setext
1148 // underlines, and horizontal rules inside the footnote
1149 if next_trimmed.starts_with('>')
1150 || TableUtils::is_potential_table_row(next_trimmed)
1151 || is_list_item(next_trimmed)
1152 || is_setext_underline(next_trimmed)
1153 || is_horizontal_rule(next_trimmed)
1154 {
1155 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1156 last_consumed = i;
1157 i += 1;
1158 continue;
1159 }
1160 // Headings, new footnote defs, link refs — end the footnote
1161 break;
1162 }
1163
1164 // Link reference definitions inside footnotes are not reflowable
1165 if next_trimmed.starts_with('[')
1166 && !next_trimmed.starts_with("[^")
1167 && next_trimmed.contains("]:")
1168 && LINK_REF_PATTERN.is_match(next_trimmed)
1169 {
1170 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1171 last_consumed = i;
1172 i += 1;
1173 continue;
1174 }
1175
1176 // HTML-only lines inside footnotes are not reflowable
1177 if is_html_only_line(next_trimmed) {
1178 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1179 last_consumed = i;
1180 i += 1;
1181 continue;
1182 }
1183
1184 // Regular prose content
1185 fn_lines.push(FnLineType::Content(next_trimmed.to_string()));
1186 last_consumed = i;
1187 i += 1;
1188 }
1189
1190 // Nothing collected or only empty lines
1191 if fn_lines.iter().all(|l| matches!(l, FnLineType::Empty)) || fn_lines.is_empty() {
1192 continue;
1193 }
1194
1195 // --- Group into blocks ---
1196 #[derive(Debug)]
1197 enum FnBlock {
1198 Paragraph(Vec<String>),
1199 Verbatim(Vec<(String, usize)>), // (content, indent) preserved as-is
1200 }
1201
1202 let mut blocks: Vec<FnBlock> = Vec::new();
1203 let mut current_para: Vec<String> = Vec::new();
1204 let mut current_verbatim: Vec<(String, usize)> = Vec::new();
1205
1206 for fl in &fn_lines {
1207 match fl {
1208 FnLineType::Content(s) => {
1209 if !current_verbatim.is_empty() {
1210 blocks.push(FnBlock::Verbatim(std::mem::take(&mut current_verbatim)));
1211 }
1212 current_para.push(s.clone());
1213 }
1214 FnLineType::Verbatim(s, indent) => {
1215 if !current_para.is_empty() {
1216 blocks.push(FnBlock::Paragraph(std::mem::take(&mut current_para)));
1217 }
1218 current_verbatim.push((s.clone(), *indent));
1219 }
1220 FnLineType::Empty => {
1221 if !current_para.is_empty() {
1222 blocks.push(FnBlock::Paragraph(std::mem::take(&mut current_para)));
1223 }
1224 if !current_verbatim.is_empty() {
1225 blocks.push(FnBlock::Verbatim(std::mem::take(&mut current_verbatim)));
1226 }
1227 }
1228 }
1229 }
1230 if !current_para.is_empty() {
1231 blocks.push(FnBlock::Paragraph(current_para));
1232 }
1233 if !current_verbatim.is_empty() {
1234 blocks.push(FnBlock::Verbatim(current_verbatim));
1235 }
1236
1237 // --- Reflow paragraphs and reconstruct ---
1238 let prefix_display_width = prefix.chars().count() + 1; // +1 for space
1239 let reflow_line_length = if config.line_length.is_unlimited() {
1240 usize::MAX
1241 } else {
1242 config
1243 .line_length
1244 .get()
1245 .saturating_sub(FN_INDENT.max(prefix_display_width))
1246 .max(20)
1247 };
1248 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1249 line_length: reflow_line_length,
1250 break_on_sentences: true,
1251 preserve_breaks: false,
1252 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1253 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1254 abbreviations: config.abbreviations_for_reflow(),
1255 length_mode: self.reflow_length_mode(),
1256 attr_lists: ctx.flavor.supports_attr_lists(),
1257 require_sentence_capital: config.require_sentence_capital,
1258 max_list_continuation_indent: None,
1259 };
1260
1261 let indent_str = " ".repeat(FN_INDENT);
1262 let mut result_lines: Vec<String> = Vec::new();
1263 let mut is_first_block = true;
1264
1265 for block in &blocks {
1266 match block {
1267 FnBlock::Paragraph(para_lines) => {
1268 let paragraph_text = para_lines.join(" ");
1269 let paragraph_text = paragraph_text.trim();
1270 if paragraph_text.is_empty() {
1271 continue;
1272 }
1273
1274 let reflowed = crate::utils::text_reflow::reflow_line(paragraph_text, &reflow_options);
1275 if reflowed.is_empty() {
1276 continue;
1277 }
1278
1279 // Blank line separator between blocks
1280 if !result_lines.is_empty() {
1281 result_lines.push(String::new());
1282 }
1283
1284 for (idx, rline) in reflowed.iter().enumerate() {
1285 if is_first_block && idx == 0 {
1286 result_lines.push(format!("{prefix} {rline}"));
1287 } else {
1288 result_lines.push(format!("{indent_str}{rline}"));
1289 }
1290 }
1291 is_first_block = false;
1292 }
1293 FnBlock::Verbatim(verb_lines) => {
1294 // Blank line separator between blocks
1295 if !result_lines.is_empty() {
1296 result_lines.push(String::new());
1297 }
1298
1299 if is_first_block {
1300 // Verbatim as first block in a deferred-body footnote
1301 if deferred_body {
1302 result_lines.push(prefix.to_string());
1303 }
1304 is_first_block = false;
1305 }
1306 for (content, _orig_indent) in verb_lines {
1307 result_lines.push(format!("{indent_str}{content}"));
1308 }
1309 }
1310 }
1311 }
1312
1313 // If nothing was produced, skip
1314 if result_lines.is_empty() {
1315 continue;
1316 }
1317
1318 let reflowed_text = result_lines.join(line_ending);
1319
1320 // Calculate byte range using last_consumed
1321 let start_range = line_index.whole_line_range(footnote_start + 1);
1322 let end_range = if last_consumed == lines.len() - 1 && !ctx.content.ends_with('\n') {
1323 line_index.line_text_range(last_consumed + 1, 1, lines[last_consumed].len() + 1)
1324 } else {
1325 line_index.whole_line_range(last_consumed + 1)
1326 };
1327 let byte_range = start_range.start..end_range.end;
1328
1329 let replacement = if last_consumed < lines.len() - 1 || ctx.content.ends_with('\n') {
1330 format!("{reflowed_text}{line_ending}")
1331 } else {
1332 reflowed_text
1333 };
1334
1335 let original_text = &ctx.content[byte_range.clone()];
1336 let max_length = (footnote_start..=last_consumed)
1337 .map(|idx| self.calculate_effective_length(lines[idx]))
1338 .max()
1339 .unwrap_or(0);
1340 let line_limit = if config.line_length.is_unlimited() {
1341 usize::MAX
1342 } else {
1343 config.line_length.get()
1344 };
1345 if original_text != replacement && max_length > line_limit {
1346 warnings.push(LintWarning {
1347 rule_name: Some(self.name().to_string()),
1348 message: format!(
1349 "Line length {} exceeds {} characters",
1350 max_length,
1351 config.line_length.get()
1352 ),
1353 line: footnote_start + 1,
1354 column: 1,
1355 end_line: last_consumed + 1,
1356 end_column: lines[last_consumed].len() + 1,
1357 severity: Severity::Warning,
1358 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
1359 });
1360 }
1361 continue;
1362 }
1363
1364 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
1365 if ctx
1366 .line_info(line_num)
1367 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container)
1368 {
1369 // Skip admonition/tab marker lines — only reflow their indented content
1370 let current_line = lines[i];
1371 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
1372 i += 1;
1373 continue;
1374 }
1375
1376 let container_start = i;
1377
1378 // Detect the actual indent level from the first content line
1379 // (supports nested admonitions with 8+ spaces)
1380 let first_line = lines[i];
1381 let base_indent_len = first_line.len() - first_line.trim_start().len();
1382 let base_indent: String = " ".repeat(base_indent_len);
1383
1384 // Collect consecutive MkDocs container paragraph lines
1385 let mut container_lines: Vec<&str> = Vec::new();
1386 while i < lines.len() {
1387 let current_line_num = i + 1;
1388 let line_info = ctx.line_info(current_line_num);
1389
1390 // Stop if we leave the MkDocs container
1391 if !line_info.is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container) {
1392 break;
1393 }
1394
1395 let line = lines[i];
1396
1397 // Stop at paragraph boundaries within the container
1398 if line.trim().is_empty() {
1399 break;
1400 }
1401
1402 // Skip list items, code blocks, headings, HTML-only lines within containers
1403 if is_list_item(line.trim())
1404 || line.trim().starts_with("```")
1405 || line.trim().starts_with("~~~")
1406 || line.trim().starts_with('#')
1407 || is_html_only_line(line)
1408 {
1409 break;
1410 }
1411
1412 container_lines.push(line);
1413 i += 1;
1414 }
1415
1416 if container_lines.is_empty() {
1417 // Must advance i to avoid infinite loop when we encounter
1418 // non-paragraph content (code block, list, heading, empty line)
1419 // at the start of an MkDocs container
1420 i += 1;
1421 continue;
1422 }
1423
1424 // Strip the base indent from each line and join for reflow
1425 let stripped_lines: Vec<&str> = container_lines
1426 .iter()
1427 .map(|line| {
1428 if line.starts_with(&base_indent) {
1429 &line[base_indent_len..]
1430 } else {
1431 line.trim_start()
1432 }
1433 })
1434 .collect();
1435 let paragraph_text = stripped_lines.join(" ");
1436
1437 // Check if reflow is needed
1438 let needs_reflow = match config.reflow_mode {
1439 ReflowMode::Normalize => self.normalize_mode_needs_reflow(container_lines.iter().copied(), config),
1440 ReflowMode::SentencePerLine => {
1441 let sentences = split_into_sentences(¶graph_text);
1442 sentences.len() > 1 || container_lines.len() > 1
1443 }
1444 ReflowMode::SemanticLineBreaks => {
1445 let sentences = split_into_sentences(¶graph_text);
1446 sentences.len() > 1
1447 || container_lines.len() > 1
1448 || container_lines
1449 .iter()
1450 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1451 }
1452 ReflowMode::Default => container_lines
1453 .iter()
1454 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
1455 };
1456
1457 if !needs_reflow {
1458 continue;
1459 }
1460
1461 // Calculate byte range for this container paragraph
1462 let start_range = line_index.whole_line_range(container_start + 1);
1463 let end_line = container_start + container_lines.len() - 1;
1464 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1465 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1466 } else {
1467 line_index.whole_line_range(end_line + 1)
1468 };
1469 let byte_range = start_range.start..end_range.end;
1470
1471 // Reflow with adjusted line length (accounting for the 4-space indent)
1472 let reflow_line_length = if config.line_length.is_unlimited() {
1473 usize::MAX
1474 } else {
1475 config.line_length.get().saturating_sub(base_indent_len).max(1)
1476 };
1477 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1478 line_length: reflow_line_length,
1479 break_on_sentences: true,
1480 preserve_breaks: false,
1481 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1482 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1483 abbreviations: config.abbreviations_for_reflow(),
1484 length_mode: self.reflow_length_mode(),
1485 attr_lists: ctx.flavor.supports_attr_lists(),
1486 require_sentence_capital: config.require_sentence_capital,
1487 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
1488 Some(4)
1489 } else {
1490 None
1491 },
1492 };
1493 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1494
1495 // Re-add the 4-space indent to each reflowed line
1496 let reflowed_with_indent: Vec<String> =
1497 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
1498 let reflowed_text = reflowed_with_indent.join(line_ending);
1499
1500 // Preserve trailing newline
1501 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1502 format!("{reflowed_text}{line_ending}")
1503 } else {
1504 reflowed_text
1505 };
1506
1507 // Only generate a warning if the replacement is different
1508 let original_text = &ctx.content[byte_range.clone()];
1509 if original_text != replacement {
1510 warnings.push(LintWarning {
1511 rule_name: Some(self.name().to_string()),
1512 message: format!(
1513 "Line length {} exceeds {} characters (in MkDocs container)",
1514 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
1515 config.line_length.get()
1516 ),
1517 line: container_start + 1,
1518 column: 1,
1519 end_line: end_line + 1,
1520 end_column: lines[end_line].len() + 1,
1521 severity: Severity::Warning,
1522 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
1523 });
1524 }
1525 continue;
1526 }
1527
1528 // Helper function to detect semantic line markers
1529 let is_semantic_line = |content: &str| -> bool {
1530 let trimmed = content.trim_start();
1531 let semantic_markers = [
1532 "NOTE:",
1533 "WARNING:",
1534 "IMPORTANT:",
1535 "CAUTION:",
1536 "TIP:",
1537 "DANGER:",
1538 "HINT:",
1539 "INFO:",
1540 ];
1541 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
1542 };
1543
1544 // Helper function to detect fence markers (opening or closing)
1545 let is_fence_marker = |content: &str| -> bool {
1546 let trimmed = content.trim_start();
1547 trimmed.starts_with("```") || trimmed.starts_with("~~~")
1548 };
1549
1550 // Check if this is a list item - handle it specially
1551 let trimmed = lines[i].trim();
1552 if is_list_item(trimmed) {
1553 // Collect the entire list item including continuation lines
1554 let list_start = i;
1555 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
1556 let marker_len = marker.len();
1557
1558 // Checkbox ([ ]/[x]/[X]) is inline content, not part of the list marker.
1559 // Use the base bullet/number marker width for continuation recognition
1560 // so that continuation lines at 2+ spaces are collected for "- [ ] " items.
1561 let base_marker_len = if marker.contains("[ ] ") || marker.contains("[x] ") || marker.contains("[X] ") {
1562 marker.find('[').unwrap_or(marker_len)
1563 } else {
1564 marker_len
1565 };
1566
1567 // MkDocs flavor requires at least 4 spaces for list continuation
1568 // after a blank line (multi-paragraph list items). For non-blank
1569 // continuation (lines directly following the marker line), use
1570 // the natural marker width so that 2-space indent is recognized.
1571 let item_indent = ctx.lines[i].indent;
1572 let min_continuation_indent = if ctx.flavor.requires_strict_list_indent() {
1573 // Use 4-space relative indent from the list item's nesting level
1574 item_indent + (base_marker_len - item_indent).max(4)
1575 } else {
1576 marker_len
1577 };
1578 let content_continuation_indent = base_marker_len;
1579
1580 // Track lines and their types (content, code block, fence, nested list)
1581 #[derive(Clone)]
1582 enum LineType {
1583 Content(String),
1584 CodeBlock(String, usize), // content and original indent
1585 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
1586 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1587 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
1588 AdmonitionHeader(String, usize), // header text (e.g. "!!! note") and original indent
1589 AdmonitionContent(String, usize), // body content text and original indent
1590 Table(String, usize), // GFM table row, preserved verbatim with original indent
1591 Empty,
1592 }
1593
1594 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1595 i += 1;
1596
1597 // Collect continuation lines using ctx.lines for metadata
1598 while i < lines.len() {
1599 let line_info = &ctx.lines[i];
1600
1601 // Use pre-computed is_blank from ctx
1602 if line_info.is_blank {
1603 // Empty line - check if next line is indented (part of list item)
1604 if i + 1 < lines.len() {
1605 let next_info = &ctx.lines[i + 1];
1606
1607 // Check if next line is indented enough to be continuation
1608 if !next_info.is_blank && next_info.indent >= min_continuation_indent {
1609 // This blank line is between paragraphs/blocks in the list item
1610 list_item_lines.push(LineType::Empty);
1611 i += 1;
1612 continue;
1613 }
1614 }
1615 // No indented line after blank, end of list item
1616 break;
1617 }
1618
1619 // Use pre-computed indent from ctx
1620 let indent = line_info.indent;
1621
1622 // Valid continuation must be indented at least content_continuation_indent.
1623 // For non-blank continuation, use marker_len (e.g. 2 for "- ").
1624 // MkDocs strict 4-space requirement applies only after blank lines.
1625 if indent >= content_continuation_indent {
1626 let trimmed = line_info.content(ctx.content).trim();
1627
1628 // Check for MkDocs admonition lines inside list items BEFORE
1629 // checking in_code_block. Lines inside code blocks within
1630 // admonitions have both in_admonition and in_code_block set;
1631 // admonition membership takes priority so the entire admonition
1632 // structure (including embedded code blocks) is preserved.
1633 if line_info.in_admonition {
1634 let raw_content = line_info.content(ctx.content);
1635 if mkdocs_admonitions::is_admonition_start(raw_content) {
1636 let header_text = raw_content[indent..].trim_end().to_string();
1637 list_item_lines.push(LineType::AdmonitionHeader(header_text, indent));
1638 } else {
1639 let body_text = raw_content[indent..].trim_end().to_string();
1640 list_item_lines.push(LineType::AdmonitionContent(body_text, indent));
1641 }
1642 i += 1;
1643 continue;
1644 }
1645
1646 // Use pre-computed in_code_block from ctx
1647 if line_info.in_code_block {
1648 list_item_lines.push(LineType::CodeBlock(
1649 line_info.content(ctx.content)[indent..].to_string(),
1650 indent,
1651 ));
1652 i += 1;
1653 continue;
1654 }
1655
1656 // Check if this is a SIBLING list item (breaks parent)
1657 // Nested lists are indented >= marker_len and are PART of the parent item
1658 // Siblings are at indent < marker_len (at or before parent marker)
1659 if is_list_item(trimmed) && indent < marker_len {
1660 // This is a sibling item at same or higher level - end parent item
1661 break;
1662 }
1663
1664 // Nested list items are always processed independently
1665 // by the outer loop, so break when we encounter one.
1666 // If a blank line was collected before this, uncollect it
1667 // so the outer loop preserves the blank between parent and nested.
1668 if is_list_item(trimmed) && indent >= marker_len {
1669 if matches!(list_item_lines.last(), Some(LineType::Empty)) {
1670 list_item_lines.pop();
1671 i -= 1;
1672 }
1673 break;
1674 }
1675
1676 // Normal continuation vs indented code block.
1677 // Use min_continuation_indent for the threshold since
1678 // code blocks start 4 spaces beyond the expected content
1679 // level (which is min_continuation_indent for MkDocs).
1680 if indent <= min_continuation_indent + 3 {
1681 // Extract content (remove indentation and trailing whitespace)
1682 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1683 // See: https://github.com/rvben/rumdl/issues/76
1684 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1685
1686 // Check if this is a div marker (::: opening or closing)
1687 // These must be preserved on their own line, not merged into paragraphs
1688 if line_info.is_div_marker {
1689 list_item_lines.push(LineType::DivMarker(content));
1690 }
1691 // Check if this is a fence marker (opening or closing)
1692 // These should be treated as code block lines, not paragraph content
1693 else if is_fence_marker(&content) {
1694 list_item_lines.push(LineType::CodeBlock(content, indent));
1695 }
1696 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1697 else if is_semantic_line(&content) {
1698 list_item_lines.push(LineType::SemanticLine(content));
1699 }
1700 // Check if this is a snippet block delimiter (-8<- or --8<--)
1701 // These must be preserved on their own lines for MkDocs Snippets extension
1702 else if is_snippet_block_delimiter(&content) {
1703 list_item_lines.push(LineType::SnippetLine(content));
1704 }
1705 // Check if this is a GFM table row. Tables nested inside list
1706 // items must be preserved verbatim — joining them with prose
1707 // breaks the column structure.
1708 //
1709 // `is_potential_table_row` is intentionally permissive at the
1710 // row level: any line with `|` and 2+ cells qualifies. To avoid
1711 // misclassifying prose continuation lines that contain a literal
1712 // pipe (e.g. "use grep | sort to ..."), require one of:
1713 // - the row is pipe-bordered (`| ... |`), the canonical form
1714 // for tables nested in lists; or
1715 // - the next line is a delimiter row (this is a header); or
1716 // - the previous classified line was already a Table (this is
1717 // a continuation row).
1718 else if TableUtils::is_potential_table_row(&content) && {
1719 let pipe_bordered = content.trim().starts_with('|') && content.trim().ends_with('|');
1720 let next_is_delim = ctx
1721 .lines
1722 .get(i + 1)
1723 .is_some_and(|next| TableUtils::is_delimiter_row(next.content(ctx.content)));
1724 let prev_was_table = matches!(list_item_lines.last(), Some(LineType::Table(..)));
1725 pipe_bordered || next_is_delim || prev_was_table
1726 } {
1727 list_item_lines.push(LineType::Table(content, indent));
1728 } else {
1729 list_item_lines.push(LineType::Content(content));
1730 }
1731 i += 1;
1732 } else {
1733 // indent >= min_continuation_indent + 4: indented code block
1734 list_item_lines.push(LineType::CodeBlock(
1735 line_info.content(ctx.content)[indent..].to_string(),
1736 indent,
1737 ));
1738 i += 1;
1739 }
1740 } else {
1741 // Not indented enough, end of list item
1742 break;
1743 }
1744 }
1745
1746 // Determine the output continuation indent.
1747 // Normalize/Default modes canonicalize to min_continuation_indent
1748 // (fixing over-indented continuation). Semantic/SentencePerLine
1749 // modes preserve the user's actual indent since they only fix
1750 // line breaking, not indentation.
1751 let indent_size = match config.reflow_mode {
1752 ReflowMode::SemanticLineBreaks | ReflowMode::SentencePerLine => {
1753 // Find indent of the first plain text continuation line,
1754 // skipping the marker line (index 0), nested list items,
1755 // code blocks, and blank lines.
1756 list_item_lines
1757 .iter()
1758 .enumerate()
1759 .skip(1)
1760 .find_map(|(k, lt)| {
1761 if matches!(lt, LineType::Content(_)) {
1762 Some(ctx.lines[list_start + k].indent)
1763 } else {
1764 None
1765 }
1766 })
1767 .unwrap_or(min_continuation_indent)
1768 }
1769 _ => min_continuation_indent,
1770 };
1771 // For checkbox items in mkdocs flavor, enforce minimum indent so
1772 // continuation lines use the structural list indent (4), not the
1773 // content-aligned indent (6) which Python-Markdown doesn't support
1774 let has_checkbox = base_marker_len < marker_len;
1775 let indent_size = if has_checkbox && ctx.flavor.requires_strict_list_indent() {
1776 indent_size.max(min_continuation_indent)
1777 } else {
1778 indent_size
1779 };
1780 let expected_indent = " ".repeat(indent_size);
1781
1782 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1783 let mut builder = BlockBuilder::new();
1784 for line in &list_item_lines {
1785 match line {
1786 LineType::Empty => builder.feed_blank_line(),
1787 LineType::Content(content) => builder.feed_content(content),
1788 LineType::CodeBlock(content, indent) => builder.feed_code_line(content, *indent),
1789 LineType::SemanticLine(content) => builder.feed_semantic_line(content),
1790 LineType::SnippetLine(content) => builder.feed_snippet_line(content),
1791 LineType::DivMarker(content) => builder.feed_div_marker(content),
1792 LineType::AdmonitionHeader(header_text, indent) => {
1793 builder.feed_admonition_header(header_text, *indent)
1794 }
1795 LineType::AdmonitionContent(content, indent) => {
1796 builder.feed_admonition_content(content, *indent)
1797 }
1798 LineType::Table(content, indent) => builder.feed_table_line(content, *indent),
1799 }
1800 }
1801 let blocks = builder.finalize();
1802
1803 // Helper: check if a line (raw source or stripped content) is exempt
1804 // from line-length checks. Link reference definitions are always exempt;
1805 // standalone link/image lines are exempt when strict mode is off.
1806 // Also checks content after stripping list markers, since list item
1807 // continuation lines may contain link ref defs.
1808 let is_exempt_line = |raw_line: &str| -> bool {
1809 let trimmed = raw_line.trim();
1810 // Link reference definitions: always exempt
1811 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
1812 return true;
1813 }
1814 // Also check after stripping list markers (for list item content)
1815 if is_list_item(trimmed) {
1816 let (_, content) = extract_list_marker_and_content(trimmed);
1817 let content_trimmed = content.trim();
1818 if content_trimmed.starts_with('[')
1819 && content_trimmed.contains("]:")
1820 && LINK_REF_PATTERN.is_match(content_trimmed)
1821 {
1822 return true;
1823 }
1824 }
1825 // Standalone link/image lines: exempt when not strict
1826 if !config.strict && is_standalone_link_or_image_line(raw_line) {
1827 return true;
1828 }
1829 // HTML-only lines: exempt when not strict
1830 if !config.strict && is_html_only_line(raw_line) {
1831 return true;
1832 }
1833 false
1834 };
1835
1836 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1837 // Exclude link reference definitions and standalone link lines from content
1838 // so they don't pollute combined_content or trigger false reflow.
1839 let content_lines: Vec<String> = list_item_lines
1840 .iter()
1841 .filter_map(|line| {
1842 if let LineType::Content(s) = line {
1843 if is_exempt_line(s) {
1844 return None;
1845 }
1846 Some(s.clone())
1847 } else {
1848 None
1849 }
1850 })
1851 .collect();
1852
1853 // Check if we need to reflow this list item
1854 // We check the combined content to see if it exceeds length limits
1855 let combined_content = content_lines.join(" ").trim().to_string();
1856
1857 // Helper to check if we should reflow in normalize mode
1858 let should_normalize = || {
1859 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1860 // DO normalize if it has plain text content that spans multiple lines
1861 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1862 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1863 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1864 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1865 let has_admonitions = blocks.iter().any(|b| matches!(b, Block::Admonition { .. }));
1866 let has_tables = blocks.iter().any(|b| matches!(b, Block::Table { .. }));
1867 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1868
1869 // If we have structural blocks but no paragraphs, don't normalize
1870 if (has_code_blocks
1871 || has_semantic_lines
1872 || has_snippet_lines
1873 || has_div_markers
1874 || has_admonitions
1875 || has_tables)
1876 && !has_paragraphs
1877 {
1878 return false;
1879 }
1880
1881 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1882 if has_paragraphs {
1883 // Count only paragraphs that contain at least one non-exempt line.
1884 // Paragraphs consisting entirely of link ref defs or standalone links
1885 // should not trigger normalization.
1886 let paragraph_count = blocks
1887 .iter()
1888 .filter(|b| {
1889 if let Block::Paragraph(para_lines) = b {
1890 !para_lines.iter().all(|line| is_exempt_line(line))
1891 } else {
1892 false
1893 }
1894 })
1895 .count();
1896 if paragraph_count > 1 {
1897 // Multiple non-exempt paragraph blocks should be normalized
1898 return true;
1899 }
1900
1901 // Single paragraph block: normalize if it has multiple content lines
1902 if content_lines.len() > 1 {
1903 return true;
1904 }
1905 }
1906
1907 false
1908 };
1909
1910 let needs_reflow = match config.reflow_mode {
1911 ReflowMode::Normalize => {
1912 // Only reflow if:
1913 // 1. Any non-exempt paragraph, when joined, exceeds the limit, OR
1914 // 2. Any admonition content line exceeds the limit, OR
1915 // 3. The list item should be normalized (has multi-line plain text)
1916 let any_paragraph_exceeds = blocks.iter().any(|block| match block {
1917 Block::Paragraph(para_lines) => {
1918 if para_lines.iter().all(|line| is_exempt_line(line)) {
1919 return false;
1920 }
1921 let joined = para_lines.join(" ");
1922 let with_marker = format!("{}{}", " ".repeat(indent_size), joined.trim());
1923 self.calculate_effective_length(&with_marker) > config.line_length.get()
1924 }
1925 Block::Admonition {
1926 content_lines,
1927 header_indent,
1928 ..
1929 } => content_lines.iter().any(|(content, indent)| {
1930 if content.is_empty() {
1931 return false;
1932 }
1933 let with_indent = format!("{}{}", " ".repeat(*indent.max(header_indent)), content);
1934 self.calculate_effective_length(&with_indent) > config.line_length.get()
1935 }),
1936 _ => false,
1937 });
1938 if any_paragraph_exceeds {
1939 true
1940 } else {
1941 should_normalize()
1942 }
1943 }
1944 ReflowMode::SentencePerLine => {
1945 // Check if list item has multiple sentences
1946 let sentences = split_into_sentences(&combined_content);
1947 sentences.len() > 1
1948 }
1949 ReflowMode::SemanticLineBreaks => {
1950 let sentences = split_into_sentences(&combined_content);
1951 sentences.len() > 1
1952 || (list_start..i).any(|line_idx| {
1953 let line = lines[line_idx];
1954 let trimmed = line.trim();
1955 if trimmed.is_empty() || is_exempt_line(line) {
1956 return false;
1957 }
1958 self.calculate_effective_length(line) > config.line_length.get()
1959 })
1960 }
1961 ReflowMode::Default => {
1962 // In default mode, only reflow if any individual non-exempt line exceeds limit
1963 (list_start..i).any(|line_idx| {
1964 let line = lines[line_idx];
1965 let trimmed = line.trim();
1966 // Skip blank lines and exempt lines
1967 if trimmed.is_empty() || is_exempt_line(line) {
1968 return false;
1969 }
1970 self.calculate_effective_length(line) > config.line_length.get()
1971 })
1972 }
1973 };
1974
1975 if needs_reflow {
1976 let start_range = line_index.whole_line_range(list_start + 1);
1977 let end_line = i - 1;
1978 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1979 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1980 } else {
1981 line_index.whole_line_range(end_line + 1)
1982 };
1983 let byte_range = start_range.start..end_range.end;
1984
1985 // Reflow each block (paragraphs only, preserve code blocks)
1986 // When line_length = 0 (no limit), use a very large value for reflow
1987 let reflow_line_length = if config.line_length.is_unlimited() {
1988 usize::MAX
1989 } else {
1990 config.line_length.get().saturating_sub(indent_size).max(1)
1991 };
1992 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1993 line_length: reflow_line_length,
1994 break_on_sentences: true,
1995 preserve_breaks: false,
1996 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1997 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1998 abbreviations: config.abbreviations_for_reflow(),
1999 length_mode: self.reflow_length_mode(),
2000 attr_lists: ctx.flavor.supports_attr_lists(),
2001 require_sentence_capital: config.require_sentence_capital,
2002 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2003 Some(4)
2004 } else {
2005 None
2006 },
2007 };
2008
2009 let mut result: Vec<String> = Vec::new();
2010 let mut is_first_block = true;
2011
2012 for (block_idx, block) in blocks.iter().enumerate() {
2013 match block {
2014 Block::Paragraph(para_lines) => {
2015 // If every line in this paragraph is exempt (link ref defs,
2016 // standalone links), preserve the paragraph verbatim instead
2017 // of reflowing it. Reflowing would corrupt link ref defs.
2018 let all_exempt = para_lines.iter().all(|line| is_exempt_line(line));
2019
2020 if all_exempt {
2021 for (idx, line) in para_lines.iter().enumerate() {
2022 if is_first_block && idx == 0 {
2023 result.push(format!("{marker}{line}"));
2024 is_first_block = false;
2025 } else {
2026 result.push(format!("{expected_indent}{line}"));
2027 }
2028 }
2029 } else {
2030 // Split the paragraph into segments at hard break boundaries
2031 // Each segment can be reflowed independently
2032 let segments = split_into_segments(para_lines);
2033
2034 for (segment_idx, segment) in segments.iter().enumerate() {
2035 // Check if this segment ends with a hard break and what type
2036 let hard_break_type = segment.last().and_then(|line| {
2037 let line = line.strip_suffix('\r').unwrap_or(line);
2038 if line.ends_with('\\') {
2039 Some("\\")
2040 } else if line.ends_with(" ") {
2041 Some(" ")
2042 } else {
2043 None
2044 }
2045 });
2046
2047 // Join and reflow the segment (removing the hard break marker for processing)
2048 let segment_for_reflow: Vec<String> = segment
2049 .iter()
2050 .map(|line| {
2051 // Strip hard break marker (2 spaces or backslash) for reflow processing
2052 if line.ends_with('\\') {
2053 line[..line.len() - 1].trim_end().to_string()
2054 } else if line.ends_with(" ") {
2055 line[..line.len() - 2].trim_end().to_string()
2056 } else {
2057 line.clone()
2058 }
2059 })
2060 .collect();
2061
2062 let segment_text = segment_for_reflow.join(" ").trim().to_string();
2063 if !segment_text.is_empty() {
2064 let reflowed =
2065 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
2066
2067 if is_first_block && segment_idx == 0 {
2068 // First segment of first block starts with marker
2069 result.push(format!("{marker}{}", reflowed[0]));
2070 for line in reflowed.iter().skip(1) {
2071 result.push(format!("{expected_indent}{line}"));
2072 }
2073 is_first_block = false;
2074 } else {
2075 // Subsequent segments
2076 for line in reflowed {
2077 result.push(format!("{expected_indent}{line}"));
2078 }
2079 }
2080
2081 // If this segment had a hard break, add it back to the last line
2082 // Preserve the original hard break format (backslash or two spaces)
2083 if let Some(break_marker) = hard_break_type
2084 && let Some(last_line) = result.last_mut()
2085 {
2086 last_line.push_str(break_marker);
2087 }
2088 }
2089 }
2090 }
2091
2092 // Add blank line after paragraph block if there's a next block.
2093 // Check if next block is a code block that doesn't want a preceding blank.
2094 // Also don't add blank lines before snippet lines (they should stay tight).
2095 // Only add if not already ending with one (avoids double blanks).
2096 if block_idx < blocks.len() - 1 {
2097 let next_block = &blocks[block_idx + 1];
2098 let should_add_blank = match next_block {
2099 Block::Code {
2100 has_preceding_blank, ..
2101 } => *has_preceding_blank,
2102 Block::Table {
2103 has_preceding_blank, ..
2104 } => *has_preceding_blank,
2105 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2106 _ => true, // For all other blocks, add blank line
2107 };
2108 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2109 result.push(String::new());
2110 }
2111 }
2112 }
2113 Block::Code {
2114 lines: code_lines,
2115 has_preceding_blank: _,
2116 } => {
2117 // Preserve code blocks as-is with original indentation
2118 // NOTE: Blank line before code block is handled by the previous block
2119 // (see paragraph block's logic above)
2120
2121 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
2122 if is_first_block && idx == 0 {
2123 // First line of first block gets marker
2124 result.push(format!(
2125 "{marker}{}",
2126 " ".repeat(orig_indent - marker_len) + content
2127 ));
2128 is_first_block = false;
2129 } else if content.is_empty() {
2130 result.push(String::new());
2131 } else {
2132 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2133 }
2134 }
2135 }
2136 Block::SemanticLine(content) => {
2137 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
2138 // Only add blank before if not already ending with one.
2139 if !is_first_block && result.last().is_none_or(|s: &String| !s.is_empty()) {
2140 result.push(String::new());
2141 }
2142
2143 if is_first_block {
2144 // First block starts with marker
2145 result.push(format!("{marker}{content}"));
2146 is_first_block = false;
2147 } else {
2148 // Subsequent blocks use expected indent
2149 result.push(format!("{expected_indent}{content}"));
2150 }
2151
2152 // Add blank line after semantic line if there's a next block.
2153 // Only add if not already ending with one.
2154 if block_idx < blocks.len() - 1 {
2155 let next_block = &blocks[block_idx + 1];
2156 let should_add_blank = match next_block {
2157 Block::Code {
2158 has_preceding_blank, ..
2159 } => *has_preceding_blank,
2160 Block::Table {
2161 has_preceding_blank, ..
2162 } => *has_preceding_blank,
2163 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2164 _ => true, // For all other blocks, add blank line
2165 };
2166 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2167 result.push(String::new());
2168 }
2169 }
2170 }
2171 Block::SnippetLine(content) => {
2172 // Preserve snippet delimiters (-8<-) as-is on their own line
2173 // Unlike semantic lines, snippet lines don't add extra blank lines
2174 if is_first_block {
2175 // First block starts with marker
2176 result.push(format!("{marker}{content}"));
2177 is_first_block = false;
2178 } else {
2179 // Subsequent blocks use expected indent
2180 result.push(format!("{expected_indent}{content}"));
2181 }
2182 // No blank lines added before or after snippet delimiters
2183 }
2184 Block::DivMarker(content) => {
2185 // Preserve div markers (::: opening or closing) as-is on their own line
2186 if is_first_block {
2187 result.push(format!("{marker}{content}"));
2188 is_first_block = false;
2189 } else {
2190 result.push(format!("{expected_indent}{content}"));
2191 }
2192 }
2193 Block::Html {
2194 lines: html_lines,
2195 has_preceding_blank: _,
2196 } => {
2197 // Preserve HTML blocks exactly as-is with original indentation
2198 // NOTE: Blank line before HTML block is handled by the previous block
2199
2200 for (idx, line) in html_lines.iter().enumerate() {
2201 if is_first_block && idx == 0 {
2202 // First line of first block gets marker
2203 result.push(format!("{marker}{line}"));
2204 is_first_block = false;
2205 } else if line.is_empty() {
2206 // Preserve blank lines inside HTML blocks
2207 result.push(String::new());
2208 } else {
2209 // Preserve lines with their original content (already includes indentation)
2210 result.push(format!("{expected_indent}{line}"));
2211 }
2212 }
2213
2214 // Add blank line after HTML block if there's a next block.
2215 // Only add if not already ending with one (avoids double blanks
2216 // when the HTML block itself contained a trailing blank line).
2217 if block_idx < blocks.len() - 1 {
2218 let next_block = &blocks[block_idx + 1];
2219 let should_add_blank = match next_block {
2220 Block::Code {
2221 has_preceding_blank, ..
2222 } => *has_preceding_blank,
2223 Block::Html {
2224 has_preceding_blank, ..
2225 } => *has_preceding_blank,
2226 Block::Table {
2227 has_preceding_blank, ..
2228 } => *has_preceding_blank,
2229 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2230 _ => true, // For all other blocks, add blank line
2231 };
2232 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2233 result.push(String::new());
2234 }
2235 }
2236 }
2237 Block::Table {
2238 lines: table_lines,
2239 has_preceding_blank: _,
2240 } => {
2241 // Preserve table rows verbatim with their original indentation.
2242 // Reflowing rows would corrupt column alignment and inject `|`
2243 // characters mid-paragraph (issue #590).
2244 // The leading blank line is emitted by the previous block.
2245 for (idx, (content, orig_indent)) in table_lines.iter().enumerate() {
2246 if is_first_block && idx == 0 {
2247 // First line of first block gets the list marker
2248 result.push(format!(
2249 "{marker}{}",
2250 " ".repeat(orig_indent.saturating_sub(marker_len)) + content
2251 ));
2252 is_first_block = false;
2253 } else {
2254 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2255 }
2256 }
2257
2258 // Add blank line after table block if there's a next block.
2259 if block_idx < blocks.len() - 1 {
2260 let next_block = &blocks[block_idx + 1];
2261 let should_add_blank = match next_block {
2262 Block::Code {
2263 has_preceding_blank, ..
2264 } => *has_preceding_blank,
2265 Block::Html {
2266 has_preceding_blank, ..
2267 } => *has_preceding_blank,
2268 Block::Table {
2269 has_preceding_blank, ..
2270 } => *has_preceding_blank,
2271 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2272 _ => true,
2273 };
2274 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2275 result.push(String::new());
2276 }
2277 }
2278 }
2279 Block::Admonition {
2280 header,
2281 header_indent,
2282 content_lines: admon_lines,
2283 } => {
2284 // Reconstruct admonition block with header at original indent
2285 // and body content reflowed to fit within the line length limit
2286
2287 // Add blank line before admonition if not first block
2288 if !is_first_block && result.last().is_none_or(|s: &String| !s.is_empty()) {
2289 result.push(String::new());
2290 }
2291
2292 // Output the header at its original indent
2293 let header_indent_str = " ".repeat(*header_indent);
2294 if is_first_block {
2295 result.push(format!(
2296 "{marker}{}",
2297 " ".repeat(header_indent.saturating_sub(marker_len)) + header
2298 ));
2299 is_first_block = false;
2300 } else {
2301 result.push(format!("{header_indent_str}{header}"));
2302 }
2303
2304 // Derive body indent from the first non-empty content line's
2305 // stored indent, falling back to header_indent + 4 for
2306 // empty-body admonitions
2307 let body_indent = admon_lines
2308 .iter()
2309 .find(|(content, _)| !content.is_empty())
2310 .map_or(header_indent + 4, |(_, indent)| *indent);
2311 let body_indent_str = " ".repeat(body_indent);
2312
2313 // Segment body content into code blocks (verbatim) and
2314 // text paragraphs (reflowable), separated by blank lines.
2315 // Code lines store (content, orig_indent) to reconstruct
2316 // internal indentation relative to body_indent.
2317 enum AdmonSegment {
2318 Text(Vec<String>),
2319 Code(Vec<(String, usize)>),
2320 }
2321
2322 let mut segments: Vec<AdmonSegment> = Vec::new();
2323 let mut current_text: Vec<String> = Vec::new();
2324 let mut current_code: Vec<(String, usize)> = Vec::new();
2325 let mut in_admon_code = false;
2326 // Track the opening fence character so closing fences
2327 // must match (backticks close backticks, tildes close tildes)
2328 let mut fence_char: char = '`';
2329
2330 // Opening fences: ``` or ~~~ followed by optional info string
2331 let get_opening_fence = |s: &str| -> Option<(char, usize)> {
2332 let t = s.trim_start();
2333 if t.starts_with("```") {
2334 Some(('`', t.bytes().take_while(|&b| b == b'`').count()))
2335 } else if t.starts_with("~~~") {
2336 Some(('~', t.bytes().take_while(|&b| b == b'~').count()))
2337 } else {
2338 None
2339 }
2340 };
2341 // Closing fences: ONLY fence chars + optional trailing spaces
2342 let get_closing_fence = |s: &str| -> Option<(char, usize)> {
2343 let t = s.trim();
2344 if t.starts_with("```") && t.bytes().all(|b| b == b'`') {
2345 Some(('`', t.len()))
2346 } else if t.starts_with("~~~") && t.bytes().all(|b| b == b'~') {
2347 Some(('~', t.len()))
2348 } else {
2349 None
2350 }
2351 };
2352 let mut fence_len: usize = 3;
2353
2354 for (content, orig_indent) in admon_lines {
2355 if in_admon_code {
2356 // Closing fence must use the same character, be
2357 // at least as long, and have no info string
2358 if let Some((ch, len)) = get_closing_fence(content)
2359 && ch == fence_char
2360 && len >= fence_len
2361 {
2362 current_code.push((content.clone(), *orig_indent));
2363 in_admon_code = false;
2364 segments.push(AdmonSegment::Code(std::mem::take(&mut current_code)));
2365 continue;
2366 }
2367 current_code.push((content.clone(), *orig_indent));
2368 } else if let Some((ch, len)) = get_opening_fence(content) {
2369 if !current_text.is_empty() {
2370 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2371 }
2372 in_admon_code = true;
2373 fence_char = ch;
2374 fence_len = len;
2375 current_code.push((content.clone(), *orig_indent));
2376 } else if content.is_empty() {
2377 if !current_text.is_empty() {
2378 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2379 }
2380 } else {
2381 current_text.push(content.clone());
2382 }
2383 }
2384 if in_admon_code && !current_code.is_empty() {
2385 segments.push(AdmonSegment::Code(std::mem::take(&mut current_code)));
2386 }
2387 if !current_text.is_empty() {
2388 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2389 }
2390
2391 // Build reflow options once for all text segments
2392 let admon_reflow_length = if config.line_length.is_unlimited() {
2393 usize::MAX
2394 } else {
2395 config.line_length.get().saturating_sub(body_indent).max(1)
2396 };
2397
2398 let admon_reflow_options = crate::utils::text_reflow::ReflowOptions {
2399 line_length: admon_reflow_length,
2400 break_on_sentences: true,
2401 preserve_breaks: false,
2402 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2403 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2404 abbreviations: config.abbreviations_for_reflow(),
2405 length_mode: self.reflow_length_mode(),
2406 attr_lists: ctx.flavor.supports_attr_lists(),
2407 require_sentence_capital: config.require_sentence_capital,
2408 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2409 Some(4)
2410 } else {
2411 None
2412 },
2413 };
2414
2415 // Output each segment
2416 for segment in &segments {
2417 // Blank line before each segment (after the header or previous segment)
2418 result.push(String::new());
2419
2420 match segment {
2421 AdmonSegment::Code(lines) => {
2422 for (line, orig_indent) in lines {
2423 if line.is_empty() {
2424 // Preserve blank lines inside code blocks
2425 result.push(String::new());
2426 } else {
2427 // Reconstruct with body_indent + any extra
2428 // indentation the line had beyond body_indent
2429 let extra = orig_indent.saturating_sub(body_indent);
2430 let indent_str = " ".repeat(body_indent + extra);
2431 result.push(format!("{indent_str}{line}"));
2432 }
2433 }
2434 }
2435 AdmonSegment::Text(lines) => {
2436 let paragraph_text = lines.join(" ").trim().to_string();
2437 if paragraph_text.is_empty() {
2438 continue;
2439 }
2440 let reflowed = crate::utils::text_reflow::reflow_line(
2441 ¶graph_text,
2442 &admon_reflow_options,
2443 );
2444 for line in &reflowed {
2445 result.push(format!("{body_indent_str}{line}"));
2446 }
2447 }
2448 }
2449 }
2450
2451 // Add blank line after admonition if there's a next block
2452 if block_idx < blocks.len() - 1 {
2453 let next_block = &blocks[block_idx + 1];
2454 let should_add_blank = match next_block {
2455 Block::Code {
2456 has_preceding_blank, ..
2457 } => *has_preceding_blank,
2458 Block::Table {
2459 has_preceding_blank, ..
2460 } => *has_preceding_blank,
2461 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2462 _ => true,
2463 };
2464 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2465 result.push(String::new());
2466 }
2467 }
2468 }
2469 }
2470 }
2471
2472 let reflowed_text = result.join(line_ending);
2473
2474 // Preserve trailing newline
2475 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2476 format!("{reflowed_text}{line_ending}")
2477 } else {
2478 reflowed_text
2479 };
2480
2481 // Get the original text to compare
2482 let original_text = &ctx.content[byte_range.clone()];
2483
2484 // Physical-line-length scan, shared by the Normalize-mode gate and its
2485 // message. The list-item reflow preserves code blocks, HTML blocks,
2486 // admonition headers, fence markers, semantic markers, and snippet/div
2487 // markers verbatim; only paragraph content and admonition bodies are
2488 // restructured. Only those lines drive the length warning, so that
2489 // preserved-but-overlong content does not keep the paragraph-level
2490 // warning alive when the reflow would not fix that line.
2491 let should_count_for_length = |line_idx: usize| -> bool {
2492 let line = lines[line_idx];
2493 let trimmed = line.trim();
2494 if trimmed.is_empty() || is_exempt_line(line) {
2495 return false;
2496 }
2497 let info = &ctx.lines[line_idx];
2498 if info.in_code_block || info.in_html_block {
2499 return false;
2500 }
2501 if info.in_admonition && mkdocs_admonitions::is_admonition_start(line) {
2502 return false;
2503 }
2504 if is_fence_marker(line) || is_semantic_line(line) {
2505 return false;
2506 }
2507 if is_snippet_block_delimiter(line) {
2508 return false;
2509 }
2510 if line.trim_start().starts_with(":::") {
2511 return false;
2512 }
2513 true
2514 };
2515 let max_physical_length = (list_start..i)
2516 .filter(|&idx| should_count_for_length(idx))
2517 .map(|idx| self.calculate_effective_length(lines[idx]))
2518 .max()
2519 .unwrap_or(0);
2520 // `line-length = 0` means "no limit" — never emit a length warning in
2521 // that configuration, even when the reflow would restructure content.
2522 let any_paragraph_line_over =
2523 !config.line_length.is_unlimited() && max_physical_length > config.line_length.get();
2524
2525 // Only generate a warning if the replacement is different from the original.
2526 // For Normalize mode, additionally require that at least one physical
2527 // content line exceeds the limit — otherwise the reflow is a cosmetic
2528 // change (continuation-indent normalization or joining/splitting
2529 // already-fitting multi-line paragraphs) and should not be reported as
2530 // a length violation. SentencePerLine and SemanticLineBreaks modes warn
2531 // on sentence structure, not length, so they ignore this gate.
2532 let gate_ok = match config.reflow_mode {
2533 ReflowMode::Normalize => original_text != replacement && any_paragraph_line_over,
2534 _ => original_text != replacement,
2535 };
2536 if gate_ok {
2537 // Generate an appropriate message based on why reflow is needed
2538 let message = match config.reflow_mode {
2539 ReflowMode::SentencePerLine => {
2540 let num_sentences = split_into_sentences(&combined_content).len();
2541 let num_lines = content_lines.len();
2542 if num_lines == 1 {
2543 // Single line with multiple sentences
2544 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2545 } else {
2546 // Multiple lines - could be split sentences or mixed
2547 format!(
2548 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
2549 )
2550 }
2551 }
2552 ReflowMode::SemanticLineBreaks => {
2553 let num_sentences = split_into_sentences(&combined_content).len();
2554 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
2555 }
2556 ReflowMode::Normalize => {
2557 // Report the physical line length of the longest over-limit
2558 // content line. The gate above guarantees at least one.
2559 format!(
2560 "Line length {} exceeds {} characters",
2561 max_physical_length,
2562 config.line_length.get()
2563 )
2564 }
2565 ReflowMode::Default => {
2566 // Report the actual longest non-exempt line, not the combined content
2567 let max_length = (list_start..i)
2568 .filter(|&line_idx| {
2569 let line = lines[line_idx];
2570 let trimmed = line.trim();
2571 !trimmed.is_empty() && !is_exempt_line(line)
2572 })
2573 .map(|line_idx| self.calculate_effective_length(lines[line_idx]))
2574 .max()
2575 .unwrap_or(0);
2576 format!(
2577 "Line length {} exceeds {} characters",
2578 max_length,
2579 config.line_length.get()
2580 )
2581 }
2582 };
2583
2584 warnings.push(LintWarning {
2585 rule_name: Some(self.name().to_string()),
2586 message,
2587 line: list_start + 1,
2588 column: 1,
2589 end_line: end_line + 1,
2590 end_column: lines[end_line].len() + 1,
2591 severity: Severity::Warning,
2592 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
2593 });
2594 }
2595 }
2596 continue;
2597 }
2598
2599 // Found start of a paragraph - collect all lines in it
2600 let paragraph_start = i;
2601 let mut paragraph_lines = vec![lines[i]];
2602 i += 1;
2603
2604 while i < lines.len() {
2605 let next_line = lines[i];
2606 let next_line_num = i + 1;
2607 let next_trimmed = next_line.trim();
2608
2609 // Stop at paragraph boundaries
2610 if next_trimmed.is_empty()
2611 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
2612 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
2613 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
2614 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
2615 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
2616 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
2617 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_block)
2618 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
2619 || ctx
2620 .line_info(next_line_num)
2621 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container)
2622 || (next_line_num > 0
2623 && next_line_num <= ctx.lines.len()
2624 && ctx.lines[next_line_num - 1].blockquote.is_some())
2625 || next_trimmed.starts_with('#')
2626 || TableUtils::is_potential_table_row(next_line)
2627 || is_list_item(next_trimmed)
2628 || is_horizontal_rule(next_line)
2629 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
2630 || is_template_directive_only(next_line)
2631 || is_standalone_attr_list(next_line)
2632 || is_snippet_block_delimiter(next_line)
2633 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
2634 || is_html_only_line(next_line)
2635 {
2636 break;
2637 }
2638
2639 // Check if the previous line ends with a hard break (2+ spaces or backslash)
2640 if i > 0 && has_hard_break(lines[i - 1]) {
2641 // Don't include lines after hard breaks in the same paragraph
2642 break;
2643 }
2644
2645 paragraph_lines.push(next_line);
2646 i += 1;
2647 }
2648
2649 // Compute the common leading indent of all non-empty paragraph lines,
2650 // but only when those lines are structurally inside a list block.
2651 // Indented continuation lines that follow a nested list arrive here
2652 // with their structural indentation intact (e.g. 2 spaces for a
2653 // top-level list item). Stripping the indent before reflow and
2654 // re-applying it afterward prevents the fixer from moving those
2655 // lines to column 0.
2656 //
2657 // The list-block guard is essential: top-level paragraphs that happen
2658 // to start with spaces (insignificant in Markdown) must NOT have those
2659 // spaces preserved or injected by the fixer.
2660 let common_indent: String = if ctx.is_in_list_block(paragraph_start + 1) {
2661 let min_len = paragraph_lines
2662 .iter()
2663 .filter(|l| !l.trim().is_empty())
2664 .map(|l| l.len() - l.trim_start().len())
2665 .min()
2666 .unwrap_or(0);
2667 paragraph_lines
2668 .iter()
2669 .find(|l| !l.trim().is_empty())
2670 .map(|l| l[..min_len].to_string())
2671 .unwrap_or_default()
2672 } else {
2673 String::new()
2674 };
2675
2676 // Combine paragraph lines into a single string for processing.
2677 // This must be done BEFORE the needs_reflow check for sentence-per-line mode.
2678 let paragraph_text = if common_indent.is_empty() {
2679 paragraph_lines.join(" ")
2680 } else {
2681 paragraph_lines
2682 .iter()
2683 .map(|l| {
2684 if l.starts_with(common_indent.as_str()) {
2685 &l[common_indent.len()..]
2686 } else {
2687 l.trim_start()
2688 }
2689 })
2690 .collect::<Vec<_>>()
2691 .join(" ")
2692 };
2693
2694 // Skip reflowing if this paragraph contains definition list items
2695 // Definition lists are multi-line structures that should not be joined
2696 let contains_definition_list = paragraph_lines
2697 .iter()
2698 .any(|line| crate::utils::is_definition_list_item(line));
2699
2700 if contains_definition_list {
2701 // Don't reflow definition lists - skip this paragraph
2702 i = paragraph_start + paragraph_lines.len();
2703 continue;
2704 }
2705
2706 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2707 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2708 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2709
2710 if contains_snippets {
2711 // Don't reflow Snippets blocks - skip this paragraph
2712 i = paragraph_start + paragraph_lines.len();
2713 continue;
2714 }
2715
2716 // Check if this paragraph needs reflowing
2717 let needs_reflow = match config.reflow_mode {
2718 ReflowMode::Normalize => self.normalize_mode_needs_reflow(paragraph_lines.iter().copied(), config),
2719 ReflowMode::SentencePerLine => {
2720 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2721 // Note: we check the joined text because sentences can span multiple lines
2722 let sentences = split_into_sentences(¶graph_text);
2723
2724 // Always reflow if multiple sentences on one line
2725 if sentences.len() > 1 {
2726 true
2727 } else if paragraph_lines.len() > 1 {
2728 // For single-sentence paragraphs spanning multiple lines:
2729 // Reflow if they COULD fit on one line (respecting line-length constraint)
2730 if config.line_length.is_unlimited() {
2731 // No line-length constraint - always join single sentences
2732 true
2733 } else {
2734 // Only join if it fits within line-length.
2735 // paragraph_text has the common indent stripped, so add it
2736 // back to get the true output length before comparing.
2737 let effective_length =
2738 self.calculate_effective_length(¶graph_text) + common_indent.len();
2739 effective_length <= config.line_length.get()
2740 }
2741 } else {
2742 false
2743 }
2744 }
2745 ReflowMode::SemanticLineBreaks => {
2746 let sentences = split_into_sentences(¶graph_text);
2747 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2748 sentences.len() > 1
2749 || paragraph_lines.len() > 1
2750 || paragraph_lines
2751 .iter()
2752 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2753 }
2754 ReflowMode::Default => {
2755 // In default mode, only reflow if lines exceed limit
2756 paragraph_lines
2757 .iter()
2758 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2759 }
2760 };
2761
2762 if needs_reflow {
2763 // Calculate byte range for this paragraph
2764 // Use whole_line_range for each line and combine
2765 let start_range = line_index.whole_line_range(paragraph_start + 1);
2766 let end_line = paragraph_start + paragraph_lines.len() - 1;
2767
2768 // For the last line, we want to preserve any trailing newline
2769 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2770 // Last line without trailing newline - use line_text_range
2771 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2772 } else {
2773 // Not the last line or has trailing newline - use whole_line_range
2774 line_index.whole_line_range(end_line + 1)
2775 };
2776
2777 let byte_range = start_range.start..end_range.end;
2778
2779 // Check if the paragraph ends with a hard break and what type
2780 let hard_break_type = paragraph_lines.last().and_then(|line| {
2781 let line = line.strip_suffix('\r').unwrap_or(line);
2782 if line.ends_with('\\') {
2783 Some("\\")
2784 } else if line.ends_with(" ") {
2785 Some(" ")
2786 } else {
2787 None
2788 }
2789 });
2790
2791 // Reflow the paragraph
2792 // When line_length = 0 (no limit), use a very large value for reflow
2793 let reflow_line_length = if config.line_length.is_unlimited() {
2794 usize::MAX
2795 } else {
2796 config.line_length.get()
2797 };
2798 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2799 line_length: reflow_line_length,
2800 break_on_sentences: true,
2801 preserve_breaks: false,
2802 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2803 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2804 abbreviations: config.abbreviations_for_reflow(),
2805 length_mode: self.reflow_length_mode(),
2806 attr_lists: ctx.flavor.supports_attr_lists(),
2807 require_sentence_capital: config.require_sentence_capital,
2808 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2809 Some(4)
2810 } else {
2811 None
2812 },
2813 };
2814 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
2815
2816 // Re-apply the common indent to each non-empty reflowed line so
2817 // that the replacement preserves the original structural indentation.
2818 if !common_indent.is_empty() {
2819 for line in &mut reflowed {
2820 if !line.is_empty() {
2821 *line = format!("{common_indent}{line}");
2822 }
2823 }
2824 }
2825
2826 // If the original paragraph ended with a hard break, preserve it
2827 // Preserve the original hard break format (backslash or two spaces)
2828 if let Some(break_marker) = hard_break_type
2829 && !reflowed.is_empty()
2830 {
2831 let last_idx = reflowed.len() - 1;
2832 if !has_hard_break(&reflowed[last_idx]) {
2833 reflowed[last_idx].push_str(break_marker);
2834 }
2835 }
2836
2837 let reflowed_text = reflowed.join(line_ending);
2838
2839 // Preserve trailing newline if the original paragraph had one
2840 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2841 format!("{reflowed_text}{line_ending}")
2842 } else {
2843 reflowed_text
2844 };
2845
2846 // Get the original text to compare
2847 let original_text = &ctx.content[byte_range.clone()];
2848
2849 // Only generate a warning if the replacement is different from the original
2850 if original_text != replacement {
2851 // Create warning with actual fix
2852 // In default mode, report the specific line that violates
2853 // In normalize mode, report the whole paragraph
2854 // In sentence-per-line mode, report the entire paragraph
2855 let (warning_line, warning_end_line) = match config.reflow_mode {
2856 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2857 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2858 // Highlight the entire paragraph that needs reformatting
2859 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2860 }
2861 ReflowMode::Default => {
2862 // Find the first line that exceeds the limit
2863 let mut violating_line = paragraph_start;
2864 for (idx, line) in paragraph_lines.iter().enumerate() {
2865 if self.calculate_effective_length(line) > config.line_length.get() {
2866 violating_line = paragraph_start + idx;
2867 break;
2868 }
2869 }
2870 (violating_line + 1, violating_line + 1)
2871 }
2872 };
2873
2874 warnings.push(LintWarning {
2875 rule_name: Some(self.name().to_string()),
2876 message: match config.reflow_mode {
2877 ReflowMode::Normalize => format!(
2878 "Paragraph could be normalized to use line length of {} characters",
2879 config.line_length.get()
2880 ),
2881 ReflowMode::SentencePerLine => {
2882 let num_sentences = split_into_sentences(¶graph_text).len();
2883 if paragraph_lines.len() == 1 {
2884 // Single line with multiple sentences
2885 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2886 } else {
2887 let num_lines = paragraph_lines.len();
2888 // Multiple lines - could be split sentences or mixed
2889 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2890 }
2891 },
2892 ReflowMode::SemanticLineBreaks => {
2893 let num_sentences = split_into_sentences(¶graph_text).len();
2894 format!(
2895 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2896 )
2897 },
2898 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2899 },
2900 line: warning_line,
2901 column: 1,
2902 end_line: warning_end_line,
2903 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2904 severity: Severity::Warning,
2905 fix: Some(crate::rule::Fix::new(byte_range, replacement)),
2906 });
2907 }
2908 }
2909 }
2910
2911 warnings
2912 }
2913
2914 /// Calculate string length based on the configured length mode
2915 fn calculate_string_length(&self, s: &str) -> usize {
2916 match self.config.length_mode {
2917 LengthMode::Chars => s.chars().count(),
2918 LengthMode::Visual => s.width(),
2919 LengthMode::Bytes => s.len(),
2920 }
2921 }
2922
2923 /// Calculate effective line length
2924 ///
2925 /// Returns the actual display length of the line using the configured length mode.
2926 fn calculate_effective_length(&self, line: &str) -> usize {
2927 self.calculate_string_length(line)
2928 }
2929
2930 /// Calculate line length with inline link/image URLs removed.
2931 ///
2932 /// For each inline link `[text](url)` or image `` on the line,
2933 /// computes the "savings" from removing the URL portion (keeping only `[text]`
2934 /// or `![alt]`). Returns `effective_length - total_savings`.
2935 ///
2936 /// Handles nested constructs (e.g., `[](url)`) by only counting the
2937 /// outermost construct to avoid double-counting.
2938 fn calculate_text_only_length(
2939 &self,
2940 effective_length: usize,
2941 line_number: usize,
2942 ctx: &crate::lint_context::LintContext,
2943 ) -> usize {
2944 let line_range = ctx.line_index.line_content_range(line_number);
2945 let line_byte_end = line_range.end;
2946
2947 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2948 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2949
2950 // Binary search: links are sorted by byte_offset, so link.line is non-decreasing
2951 let link_start = ctx.links.partition_point(|l| l.line < line_number);
2952 for link in &ctx.links[link_start..] {
2953 if link.line != line_number {
2954 break;
2955 }
2956 if link.is_reference {
2957 continue;
2958 }
2959 if !matches!(link.link_type, LinkType::Inline) {
2960 continue;
2961 }
2962 if link.byte_end > line_byte_end {
2963 continue;
2964 }
2965 let text_only_len = 2 + self.calculate_string_length(&link.text);
2966 constructs.push((link.byte_offset, link.byte_end, text_only_len));
2967 }
2968
2969 let img_start = ctx.images.partition_point(|i| i.line < line_number);
2970 for image in &ctx.images[img_start..] {
2971 if image.line != line_number {
2972 break;
2973 }
2974 if image.is_reference {
2975 continue;
2976 }
2977 if !matches!(image.link_type, LinkType::Inline) {
2978 continue;
2979 }
2980 if image.byte_end > line_byte_end {
2981 continue;
2982 }
2983 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2984 constructs.push((image.byte_offset, image.byte_end, text_only_len));
2985 }
2986
2987 if constructs.is_empty() {
2988 return effective_length;
2989 }
2990
2991 // Sort by byte offset to handle overlapping/nested constructs
2992 constructs.sort_by_key(|&(start, _, _)| start);
2993
2994 let mut total_savings: usize = 0;
2995 let mut last_end: usize = 0;
2996
2997 for (start, end, text_only_len) in &constructs {
2998 // Skip constructs nested inside a previously counted one
2999 if *start < last_end {
3000 continue;
3001 }
3002 // Full construct length in configured length mode
3003 let full_source = &ctx.content[*start..*end];
3004 let full_len = self.calculate_string_length(full_source);
3005 total_savings += full_len.saturating_sub(*text_only_len);
3006 last_end = *end;
3007 }
3008
3009 effective_length.saturating_sub(total_savings)
3010 }
3011}