rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_html_only_line,
26 is_list_item, is_standalone_link_or_image_line, split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37 pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42 line_idx: usize,
43 data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48 Self {
49 config: MD013Config {
50 line_length: crate::types::LineLength::new(line_length),
51 code_blocks,
52 tables,
53 headings,
54 paragraphs: true, // Default to true for backwards compatibility
55 blockquotes: true, // Default to true for backwards compatibility
56 strict,
57 reflow: false,
58 reflow_mode: ReflowMode::default(),
59 length_mode: LengthMode::default(),
60 abbreviations: Vec::new(),
61 require_sentence_capital: true,
62 },
63 }
64 }
65
66 pub fn from_config_struct(config: MD013Config) -> Self {
67 Self { config }
68 }
69
70 /// Return a clone with code block checking disabled.
71 /// Used for doc comment linting where code blocks are Rust code managed by rustfmt.
72 pub fn with_code_blocks_disabled(&self) -> Self {
73 let mut clone = self.clone();
74 clone.config.code_blocks = false;
75 clone
76 }
77
78 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
79 fn reflow_length_mode(&self) -> ReflowLengthMode {
80 match self.config.length_mode {
81 LengthMode::Chars => ReflowLengthMode::Chars,
82 LengthMode::Visual => ReflowLengthMode::Visual,
83 LengthMode::Bytes => ReflowLengthMode::Bytes,
84 }
85 }
86
87 fn should_ignore_line(
88 &self,
89 line: &str,
90 _lines: &[&str],
91 current_line: usize,
92 ctx: &crate::lint_context::LintContext,
93 ) -> bool {
94 if self.config.strict {
95 return false;
96 }
97
98 // Quick check for common patterns before expensive regex
99 let trimmed = line.trim();
100
101 // Only skip if the entire line is a URL (quick check first)
102 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
103 return true;
104 }
105
106 // Only skip if the entire line is an image reference (quick check first)
107 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
108 return true;
109 }
110
111 // Note: link reference definitions are handled as always-exempt (even in strict mode)
112 // in the main check loop, so they don't need to be checked here.
113
114 // Code blocks with long strings (only check if in code block)
115 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
116 && !trimmed.is_empty()
117 && !line.contains(' ')
118 && !line.contains('\t')
119 {
120 return true;
121 }
122
123 false
124 }
125
126 /// Check if rule should skip based on provided config (used for inline config support)
127 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
128 // Skip if content is empty
129 if ctx.content.is_empty() {
130 return true;
131 }
132
133 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
134 if config.reflow
135 && (config.reflow_mode == ReflowMode::SentencePerLine
136 || config.reflow_mode == ReflowMode::SemanticLineBreaks
137 || config.reflow_mode == ReflowMode::Normalize)
138 {
139 return false;
140 }
141
142 // Quick check: if total content is shorter than line limit, definitely skip
143 if ctx.content.len() <= config.line_length.get() {
144 return true;
145 }
146
147 // Skip if no line exceeds the limit
148 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
149 }
150
151 fn normalize_mode_needs_reflow<'a, I>(&self, lines: I, config: &MD013Config) -> bool
152 where
153 I: IntoIterator<Item = &'a str>,
154 {
155 let mut line_count = 0;
156 let check_length = !config.line_length.is_unlimited();
157
158 for line in lines {
159 line_count += 1;
160 if check_length && self.calculate_effective_length(line) > config.line_length.get() {
161 return true;
162 }
163 }
164
165 line_count > 1
166 }
167}
168
169impl Rule for MD013LineLength {
170 fn name(&self) -> &'static str {
171 "MD013"
172 }
173
174 fn description(&self) -> &'static str {
175 "Line length should not be excessive"
176 }
177
178 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
179 // Use pre-parsed inline config from LintContext
180 let config_override = ctx.inline_config().get_rule_config("MD013");
181
182 // Apply configuration override if present
183 let effective_config = if let Some(json_config) = config_override {
184 if let Some(obj) = json_config.as_object() {
185 let mut config = self.config.clone();
186 if let Some(line_length) = obj.get("line_length").and_then(serde_json::Value::as_u64) {
187 config.line_length = crate::types::LineLength::new(line_length as usize);
188 }
189 if let Some(code_blocks) = obj.get("code_blocks").and_then(serde_json::Value::as_bool) {
190 config.code_blocks = code_blocks;
191 }
192 if let Some(tables) = obj.get("tables").and_then(serde_json::Value::as_bool) {
193 config.tables = tables;
194 }
195 if let Some(headings) = obj.get("headings").and_then(serde_json::Value::as_bool) {
196 config.headings = headings;
197 }
198 if let Some(blockquotes) = obj.get("blockquotes").and_then(serde_json::Value::as_bool) {
199 config.blockquotes = blockquotes;
200 }
201 if let Some(strict) = obj.get("strict").and_then(serde_json::Value::as_bool) {
202 config.strict = strict;
203 }
204 if let Some(reflow) = obj.get("reflow").and_then(serde_json::Value::as_bool) {
205 config.reflow = reflow;
206 }
207 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
208 config.reflow_mode = match reflow_mode {
209 "default" => ReflowMode::Default,
210 "normalize" => ReflowMode::Normalize,
211 "sentence-per-line" => ReflowMode::SentencePerLine,
212 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
213 _ => ReflowMode::default(),
214 };
215 }
216 config
217 } else {
218 self.config.clone()
219 }
220 } else {
221 self.config.clone()
222 };
223
224 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
225 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
226 if self.should_skip_with_config(ctx, &effective_config)
227 && !(effective_config.reflow
228 && (effective_config.reflow_mode == ReflowMode::Normalize
229 || effective_config.reflow_mode == ReflowMode::SentencePerLine
230 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
231 {
232 return Ok(Vec::new());
233 }
234
235 // Direct implementation without DocumentStructure
236 let mut warnings = Vec::new();
237
238 // Special handling: line_length = 0 means "no line length limit"
239 // Skip all line length checks, but still allow reflow if enabled
240 let skip_length_checks = effective_config.line_length.is_unlimited();
241
242 // Pre-filter lines that could be problematic to avoid processing all lines
243 let mut candidate_lines = Vec::new();
244 if !skip_length_checks {
245 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
246 // Skip front matter - it should never be linted
247 if line_info.in_front_matter {
248 continue;
249 }
250
251 // Quick length check first
252 if line_info.byte_len > effective_config.line_length.get() {
253 candidate_lines.push(line_idx);
254 }
255 }
256 }
257
258 // If no candidate lines and not in normalize or sentence-per-line mode, early return
259 if candidate_lines.is_empty()
260 && !(effective_config.reflow
261 && (effective_config.reflow_mode == ReflowMode::Normalize
262 || effective_config.reflow_mode == ReflowMode::SentencePerLine
263 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
264 {
265 return Ok(warnings);
266 }
267
268 let lines = ctx.raw_lines();
269
270 // Create a quick lookup set for heading lines
271 // We need this for both the heading skip check AND the paragraphs check
272 let heading_lines_set: std::collections::HashSet<usize> = ctx
273 .lines
274 .iter()
275 .enumerate()
276 .filter(|(_, line)| line.heading.is_some())
277 .map(|(idx, _)| idx + 1)
278 .collect();
279
280 // Use pre-computed table blocks from context
281 // We need this for both the table skip check AND the paragraphs check
282 let table_blocks = &ctx.table_blocks;
283 let mut table_lines_set = std::collections::HashSet::new();
284 for table in table_blocks {
285 table_lines_set.insert(table.header_line + 1);
286 table_lines_set.insert(table.delimiter_line + 1);
287 for &line in &table.content_lines {
288 table_lines_set.insert(line + 1);
289 }
290 }
291
292 // Process candidate lines for line length checks
293 'line_loop: for &line_idx in &candidate_lines {
294 let line_number = line_idx + 1;
295 let line = lines[line_idx];
296
297 // Calculate actual line length (used in warning messages)
298 let effective_length = self.calculate_effective_length(line);
299
300 // Use single line length limit for all content
301 let line_limit = effective_config.line_length.get();
302
303 // In non-strict mode, forgive the trailing non-whitespace run.
304 // If the line only exceeds the limit because of a long token at the end
305 // (URL, link chain, identifier), it passes. This matches markdownlint's
306 // behavior: line.replace(/\S*$/u, "#")
307 let check_length = if effective_config.strict {
308 effective_length
309 } else {
310 match line.rfind(char::is_whitespace) {
311 Some(pos) => {
312 let ws_char = line[pos..].chars().next().unwrap();
313 let prefix_end = pos + ws_char.len_utf8();
314 self.calculate_string_length(&line[..prefix_end]) + 1
315 }
316 None => 1, // No whitespace — entire line is a single token
317 }
318 };
319
320 // Skip lines where the check length is within the limit
321 if check_length <= line_limit {
322 continue;
323 }
324
325 // Semantic link understanding: suppress when excess comes entirely from inline URLs
326 if !effective_config.strict {
327 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
328 if text_only_length <= line_limit {
329 continue;
330 }
331 }
332
333 // Skip mkdocstrings and pymdown blocks (already handled by LintContext)
334 if ctx.lines[line_idx].in_mkdocstrings || ctx.lines[line_idx].in_pymdown_block {
335 continue;
336 }
337
338 // Link reference definitions are always exempt, even in strict mode.
339 // There's no way to shorten them without breaking the URL.
340 // Also check after stripping list markers, since list items may
341 // contain link ref defs as their content.
342 {
343 let trimmed = line.trim();
344 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
345 continue;
346 }
347 if is_list_item(trimmed) {
348 let (_, content) = extract_list_marker_and_content(trimmed);
349 let content_trimmed = content.trim();
350 if content_trimmed.starts_with('[')
351 && content_trimmed.contains("]:")
352 && LINK_REF_PATTERN.is_match(content_trimmed)
353 {
354 continue;
355 }
356 }
357 }
358
359 // Skip various block types efficiently
360 if !effective_config.strict {
361 // Lines whose only content is a link/image are exempt.
362 // After stripping list markers, blockquote markers, and emphasis,
363 // if only a link or image remains, there is no way to shorten it.
364 if is_standalone_link_or_image_line(line) {
365 continue;
366 }
367
368 // Lines consisting entirely of HTML tags are exempt.
369 // Badge lines, images with attributes, and similar inline HTML
370 // are long due to URLs in attributes and can't be meaningfully shortened.
371 if is_html_only_line(line) {
372 continue;
373 }
374
375 // Skip setext heading underlines
376 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
377 continue;
378 }
379
380 // Skip block elements according to config flags
381 // The flags mean: true = check these elements, false = skip these elements
382 // So we skip when the flag is FALSE and the line is in that element type
383 if (!effective_config.headings && heading_lines_set.contains(&line_number))
384 || (!effective_config.code_blocks
385 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
386 || (!effective_config.tables && table_lines_set.contains(&line_number))
387 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
388 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
389 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
390 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
391 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_block)
392 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
393 || ctx.line_info(line_number).is_some_and(|info| info.in_pymdown_block)
394 {
395 continue;
396 }
397
398 // Check if this is a paragraph/regular text line
399 // If paragraphs = false, skip lines that are NOT in special blocks
400 // Blockquote content is treated as paragraph text, so it's not
401 // included in the special blocks list here.
402 if !effective_config.paragraphs {
403 let is_special_block = heading_lines_set.contains(&line_number)
404 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
405 || table_lines_set.contains(&line_number)
406 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
407 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
408 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
409 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
410 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_block)
411 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
412 || ctx
413 .line_info(line_number)
414 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container);
415
416 // Skip regular paragraph text when paragraphs = false
417 if !is_special_block {
418 continue;
419 }
420 }
421
422 // Skip blockquote lines when blockquotes = false.
423 // Also skip lazy continuation lines that belong to a blockquote
424 // (lines without `>` prefix that follow a blockquote line).
425 if !effective_config.blockquotes {
426 if ctx.lines[line_number - 1].blockquote.is_some() {
427 continue;
428 }
429 // Check for lazy continuation: scan backwards through
430 // non-blank lines to find if this paragraph started with
431 // a blockquote marker
432 if !line.trim().is_empty() {
433 let mut scan = line_number.saturating_sub(2);
434 loop {
435 if ctx.lines[scan].blockquote.is_some() {
436 // Found a blockquote ancestor — this is a lazy continuation
437 continue 'line_loop;
438 }
439 if lines[scan].trim().is_empty() || scan == 0 {
440 break;
441 }
442 scan -= 1;
443 }
444 }
445 }
446
447 // Skip lines that are only a URL, image ref, or link ref
448 if self.should_ignore_line(line, lines, line_idx, ctx) {
449 continue;
450 }
451 }
452
453 // In sentence-per-line mode, check if this is a single long sentence
454 // If so, emit a warning without a fix (user must manually rephrase)
455 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
456 let sentences = split_into_sentences(line.trim());
457 if sentences.len() == 1 {
458 // Single sentence that's too long - warn but don't auto-fix
459 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
460
461 let (start_line, start_col, end_line, end_col) =
462 calculate_excess_range(line_number, line, line_limit);
463
464 warnings.push(LintWarning {
465 rule_name: Some(self.name().to_string()),
466 message,
467 line: start_line,
468 column: start_col,
469 end_line,
470 end_column: end_col,
471 severity: Severity::Warning,
472 fix: None, // No auto-fix for long single sentences
473 });
474 continue;
475 }
476 // Multiple sentences will be handled by paragraph-based reflow
477 continue;
478 }
479
480 // In semantic-line-breaks mode, skip per-line checks —
481 // all reflow is handled at the paragraph level with cascading splits
482 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
483 continue;
484 }
485
486 // Don't provide fix for individual lines when reflow is enabled
487 // Paragraph-based fixes will be handled separately
488 let fix = None;
489
490 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
491
492 // Calculate precise character range for the excess portion
493 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
494
495 warnings.push(LintWarning {
496 rule_name: Some(self.name().to_string()),
497 message,
498 line: start_line,
499 column: start_col,
500 end_line,
501 end_column: end_col,
502 severity: Severity::Warning,
503 fix,
504 });
505 }
506
507 // If reflow is enabled, generate paragraph-based fixes
508 if effective_config.reflow {
509 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
510 // Merge paragraph warnings with line warnings, removing duplicates
511 for pw in paragraph_warnings {
512 // Remove any line warnings that overlap with this paragraph
513 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
514 warnings.push(pw);
515 }
516 }
517
518 Ok(warnings)
519 }
520
521 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
522 // For CLI usage, apply fixes from warnings
523 // LSP will use the warning-based fixes directly
524 let warnings = self.check(ctx)?;
525 let warnings =
526 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
527
528 // If there are no fixes, return content unchanged
529 if !warnings.iter().any(|w| w.fix.is_some()) {
530 return Ok(ctx.content.to_string());
531 }
532
533 // Apply warning-based fixes
534 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
535 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
536 }
537
538 fn as_any(&self) -> &dyn std::any::Any {
539 self
540 }
541
542 fn category(&self) -> RuleCategory {
543 RuleCategory::Whitespace
544 }
545
546 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
547 self.should_skip_with_config(ctx, &self.config)
548 }
549
550 fn default_config_section(&self) -> Option<(String, toml::Value)> {
551 let default_config = MD013Config::default();
552 let json_value = serde_json::to_value(&default_config).ok()?;
553 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
554
555 if let toml::Value::Table(table) = toml_value {
556 if !table.is_empty() {
557 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
558 } else {
559 None
560 }
561 } else {
562 None
563 }
564 }
565
566 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
567 let mut aliases = std::collections::HashMap::new();
568 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
569 aliases.insert("strict_sentences".to_string(), "require-sentence-capital".to_string());
570 aliases.insert("strict-sentences".to_string(), "require-sentence-capital".to_string());
571 Some(aliases)
572 }
573
574 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
575 where
576 Self: Sized,
577 {
578 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
579 // Use global line_length if rule-specific config still has default value
580 if rule_config.line_length.get() == 80 {
581 rule_config.line_length = config.global.line_length;
582 }
583 Box::new(Self::from_config_struct(rule_config))
584 }
585}
586
587impl MD013LineLength {
588 fn is_blockquote_content_boundary(
589 &self,
590 content: &str,
591 line_num: usize,
592 ctx: &crate::lint_context::LintContext,
593 ) -> bool {
594 let trimmed = content.trim();
595
596 trimmed.is_empty()
597 || ctx.line_info(line_num).is_some_and(|info| {
598 info.in_code_block
599 || info.in_front_matter
600 || info.in_html_block
601 || info.in_html_comment
602 || info.in_esm_block
603 || info.in_jsx_expression
604 || info.in_jsx_block
605 || info.in_mdx_comment
606 || info.in_mkdocstrings
607 || info.in_pymdown_block
608 || info.in_mkdocs_container()
609 || info.is_div_marker
610 })
611 || trimmed.starts_with('#')
612 || trimmed.starts_with("```")
613 || trimmed.starts_with("~~~")
614 || trimmed.starts_with('>')
615 || TableUtils::is_potential_table_row(content)
616 || is_list_item(trimmed)
617 || is_horizontal_rule(content)
618 || (trimmed.starts_with('[') && content.contains("]:"))
619 || is_template_directive_only(content)
620 || is_standalone_attr_list(content)
621 || is_snippet_block_delimiter(content)
622 || is_github_alert_marker(trimmed)
623 || is_html_only_line(content)
624 }
625
626 fn generate_blockquote_paragraph_fix(
627 &self,
628 ctx: &crate::lint_context::LintContext,
629 config: &MD013Config,
630 lines: &[&str],
631 line_index: &LineIndex,
632 start_idx: usize,
633 line_ending: &str,
634 ) -> (Option<LintWarning>, usize) {
635 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
636 return (None, start_idx + 1);
637 };
638 let target_level = start_bq.nesting_level;
639
640 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
641 let mut i = start_idx;
642
643 while i < lines.len() {
644 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
645 break;
646 }
647
648 let line_num = i + 1;
649 if line_num > ctx.lines.len() {
650 break;
651 }
652
653 if lines[i].trim().is_empty() {
654 break;
655 }
656
657 let line_bq = ctx.lines[i].blockquote.as_deref();
658 if let Some(bq) = line_bq {
659 if bq.nesting_level != target_level {
660 break;
661 }
662
663 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
664 break;
665 }
666
667 collected.push(CollectedBlockquoteLine {
668 line_idx: i,
669 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
670 });
671 i += 1;
672 continue;
673 }
674
675 let lazy_content = lines[i].trim_start();
676 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
677 break;
678 }
679
680 collected.push(CollectedBlockquoteLine {
681 line_idx: i,
682 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
683 });
684 i += 1;
685 }
686
687 if collected.is_empty() {
688 return (None, start_idx + 1);
689 }
690
691 let next_idx = i;
692 let paragraph_start = collected[0].line_idx;
693 let end_line = collected[collected.len() - 1].line_idx;
694 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
695 let paragraph_text = line_data
696 .iter()
697 .map(|d| d.content.as_str())
698 .collect::<Vec<_>>()
699 .join(" ");
700
701 let contains_definition_list = line_data
702 .iter()
703 .any(|d| crate::utils::is_definition_list_item(&d.content));
704 if contains_definition_list {
705 return (None, next_idx);
706 }
707
708 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
709 if contains_snippets {
710 return (None, next_idx);
711 }
712
713 let needs_reflow = match config.reflow_mode {
714 ReflowMode::Normalize => {
715 self.normalize_mode_needs_reflow(line_data.iter().map(|d| d.content.as_str()), config)
716 }
717 ReflowMode::SentencePerLine => {
718 let sentences = split_into_sentences(¶graph_text);
719 sentences.len() > 1 || line_data.len() > 1
720 }
721 ReflowMode::SemanticLineBreaks => {
722 let sentences = split_into_sentences(¶graph_text);
723 sentences.len() > 1
724 || line_data.len() > 1
725 || collected
726 .iter()
727 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
728 }
729 ReflowMode::Default => collected
730 .iter()
731 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
732 };
733
734 if !needs_reflow {
735 return (None, next_idx);
736 }
737
738 let fallback_prefix = start_bq.prefix.clone();
739 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
740 let continuation_style = blockquote_continuation_style(&line_data);
741
742 let reflow_line_length = if config.line_length.is_unlimited() {
743 usize::MAX
744 } else {
745 config
746 .line_length
747 .get()
748 .saturating_sub(self.calculate_string_length(&explicit_prefix))
749 .max(1)
750 };
751
752 let reflow_options = crate::utils::text_reflow::ReflowOptions {
753 line_length: reflow_line_length,
754 break_on_sentences: true,
755 preserve_breaks: false,
756 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
757 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
758 abbreviations: config.abbreviations_for_reflow(),
759 length_mode: self.reflow_length_mode(),
760 attr_lists: ctx.flavor.supports_attr_lists(),
761 require_sentence_capital: config.require_sentence_capital,
762 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
763 Some(4)
764 } else {
765 None
766 },
767 };
768
769 let reflowed_with_style =
770 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
771
772 if reflowed_with_style.is_empty() {
773 return (None, next_idx);
774 }
775
776 let reflowed_text = reflowed_with_style.join(line_ending);
777
778 let start_range = line_index.whole_line_range(paragraph_start + 1);
779 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
780 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
781 } else {
782 line_index.whole_line_range(end_line + 1)
783 };
784 let byte_range = start_range.start..end_range.end;
785
786 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
787 format!("{reflowed_text}{line_ending}")
788 } else {
789 reflowed_text
790 };
791
792 let original_text = &ctx.content[byte_range.clone()];
793 if original_text == replacement {
794 return (None, next_idx);
795 }
796
797 let (warning_line, warning_end_line) = match config.reflow_mode {
798 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
799 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
800 ReflowMode::Default => {
801 let violating_line = collected
802 .iter()
803 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
804 .map_or(paragraph_start + 1, |line| line.line_idx + 1);
805 (violating_line, violating_line)
806 }
807 };
808
809 let warning = LintWarning {
810 rule_name: Some(self.name().to_string()),
811 message: match config.reflow_mode {
812 ReflowMode::Normalize => format!(
813 "Paragraph could be normalized to use line length of {} characters",
814 config.line_length.get()
815 ),
816 ReflowMode::SentencePerLine => {
817 let num_sentences = split_into_sentences(¶graph_text).len();
818 if line_data.len() == 1 {
819 format!("Line contains {num_sentences} sentences (one sentence per line required)")
820 } else {
821 let num_lines = line_data.len();
822 format!(
823 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
824 )
825 }
826 }
827 ReflowMode::SemanticLineBreaks => {
828 let num_sentences = split_into_sentences(¶graph_text).len();
829 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
830 }
831 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
832 },
833 line: warning_line,
834 column: 1,
835 end_line: warning_end_line,
836 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
837 severity: Severity::Warning,
838 fix: Some(crate::rule::Fix {
839 range: byte_range,
840 replacement,
841 }),
842 };
843
844 (Some(warning), next_idx)
845 }
846
847 /// Generate paragraph-based fixes
848 fn generate_paragraph_fixes(
849 &self,
850 ctx: &crate::lint_context::LintContext,
851 config: &MD013Config,
852 lines: &[&str],
853 ) -> Vec<LintWarning> {
854 let mut warnings = Vec::new();
855 let line_index = LineIndex::new(ctx.content);
856
857 // Detect the content's line ending style to preserve it in replacements.
858 // The LSP receives content from editors which may use CRLF (Windows).
859 // Replacements must match the original line endings to avoid false positives.
860 let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
861
862 let mut i = 0;
863 while i < lines.len() {
864 let line_num = i + 1;
865
866 // Handle blockquote paragraphs with style-preserving reflow.
867 // Skip blockquotes when blockquotes=false or paragraphs=false
868 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
869 if !config.blockquotes || !config.paragraphs {
870 // Skip past all blockquote lines (explicit and lazy continuations).
871 // A lazy continuation is a non-blank line without `>` that follows
872 // a blockquote line and isn't a structural element.
873 let mut saw_explicit_bq = false;
874 while i < lines.len() && i < ctx.lines.len() {
875 if ctx.lines[i].blockquote.is_some() {
876 saw_explicit_bq = true;
877 i += 1;
878 } else if saw_explicit_bq
879 && !lines[i].trim().is_empty()
880 && !lines[i].trim_start().starts_with('#')
881 && !lines[i].trim_start().starts_with('>')
882 {
883 // Lazy continuation of preceding blockquote
884 i += 1;
885 } else {
886 break;
887 }
888 }
889 continue;
890 }
891 let (warning, next_idx) =
892 self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
893 if let Some(warning) = warning {
894 warnings.push(warning);
895 }
896 i = next_idx;
897 continue;
898 }
899
900 // Skip special structures (but NOT MkDocs containers - those get special handling)
901 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
902 info.in_code_block
903 || info.in_front_matter
904 || info.in_html_block
905 || info.in_html_comment
906 || info.in_esm_block
907 || info.in_jsx_expression
908 || info.in_jsx_block
909 || info.in_mdx_comment
910 || info.in_mkdocstrings
911 || info.in_pymdown_block
912 });
913
914 // Skip link reference definitions but NOT footnote definitions.
915 // Footnote definitions (`[^id]: prose`) contain reflowable text,
916 // while link reference definitions (`[ref]: URL`) contain URLs
917 // that cannot be shortened.
918 let is_link_ref_def =
919 lines[i].trim().starts_with('[') && !lines[i].trim().starts_with("[^") && lines[i].contains("]:");
920
921 if should_skip_due_to_line_info
922 || lines[i].trim().starts_with('#')
923 || TableUtils::is_potential_table_row(lines[i])
924 || lines[i].trim().is_empty()
925 || is_horizontal_rule(lines[i])
926 || is_template_directive_only(lines[i])
927 || is_link_ref_def
928 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
929 || is_html_only_line(lines[i])
930 {
931 i += 1;
932 continue;
933 }
934
935 // Handle footnote definitions: `[^id]: prose text that can be reflowed`
936 // Supports multi-paragraph footnotes with code blocks, blockquotes,
937 // tables, and lists preserved verbatim.
938 // Validate structure: must start with `[^`, contain `]:`, and the ID
939 // must not contain `[` or `]` (prevents false matches on nested brackets)
940 if lines[i].trim().starts_with("[^") && lines[i].contains("]:") && {
941 let after_caret = &lines[i].trim()[2..];
942 after_caret
943 .find("]:")
944 .is_some_and(|pos| pos > 0 && !after_caret[..pos].contains(['[', ']']))
945 } {
946 let footnote_start = i;
947 let line = lines[i];
948
949 // Extract the prefix `[^id]:`
950 let Some(colon_pos) = line.find("]:") else {
951 i += 1;
952 continue;
953 };
954 let prefix_end = colon_pos + 2;
955 let prefix = &line[..prefix_end];
956
957 // Content starts after `]: ` (with optional space)
958 let content_start = if line[prefix_end..].starts_with(' ') {
959 prefix_end + 1
960 } else {
961 prefix_end
962 };
963 let first_content = &line[content_start..];
964
965 // CommonMark footnotes use 4-space continuation indent
966 const FN_INDENT: usize = 4;
967
968 // --- Line classification for footnote content ---
969 #[derive(Debug, Clone)]
970 enum FnLineType {
971 Content(String),
972 Verbatim(String, usize), // preserved text, original indent
973 Empty,
974 }
975
976 // Helper: compute visual indent (tabs = 4 spaces)
977 let visual_indent = |s: &str| -> usize {
978 s.chars()
979 .take_while(|c| c.is_whitespace())
980 .map(|c| if c == '\t' { 4 } else { 1 })
981 .sum::<usize>()
982 };
983
984 // Helper: check if a trimmed line is a fence marker (homogeneous chars)
985 let is_fence = |s: &str| -> bool {
986 let t = s.trim();
987 let fence_char = t.chars().next();
988 matches!(fence_char, Some('`') | Some('~'))
989 && t.chars().take_while(|&c| c == fence_char.unwrap()).count() >= 3
990 };
991
992 // Helper: check if a trimmed line is a setext underline
993 let is_setext_underline = |s: &str| -> bool {
994 let t = s.trim();
995 !t.is_empty()
996 && (t.chars().all(|c| c == '=' || c == ' ') || t.chars().all(|c| c == '-' || c == ' '))
997 && t.contains(['=', '-'])
998 };
999
1000 // Deferred body: `[^id]:\n content` — first line has no content,
1001 // actual content starts on the next indented line
1002 let deferred_body = first_content.trim().is_empty();
1003
1004 // Collect all lines belonging to this footnote definition
1005 let mut fn_lines: Vec<FnLineType> = Vec::new();
1006 if !deferred_body {
1007 fn_lines.push(FnLineType::Content(first_content.to_string()));
1008 }
1009 let mut last_consumed = i;
1010 i += 1;
1011
1012 // Strip only the footnote continuation indent, preserving
1013 // internal indentation (e.g., code block body indent)
1014 let strip_fn_indent = |s: &str| -> String {
1015 let mut chars = s.chars();
1016 let mut stripped = 0;
1017 while stripped < FN_INDENT {
1018 match chars.next() {
1019 Some('\t') => stripped += 4,
1020 Some(c) if c.is_whitespace() => stripped += 1,
1021 _ => break,
1022 }
1023 }
1024 chars.as_str().to_string()
1025 };
1026
1027 let mut in_fenced_code = false;
1028 let mut consecutive_blanks = 0u32;
1029
1030 while i < lines.len() {
1031 let next = lines[i];
1032 let next_trimmed = next.trim();
1033
1034 // Blank line handling
1035 if next_trimmed.is_empty() {
1036 consecutive_blanks += 1;
1037 // 2+ consecutive blanks terminate the footnote
1038 if consecutive_blanks >= 2 {
1039 break;
1040 }
1041
1042 // Inside a fenced code block, blank lines are part of the code
1043 if in_fenced_code {
1044 consecutive_blanks = 0; // Don't count blanks inside code blocks
1045 fn_lines.push(FnLineType::Verbatim(String::new(), 0));
1046 last_consumed = i;
1047 i += 1;
1048 continue;
1049 }
1050
1051 // Peek ahead: if next non-blank line is indented >= FN_INDENT,
1052 // this blank is an internal paragraph separator
1053 if i + 1 < lines.len() {
1054 let peek = lines[i + 1];
1055 let peek_indent = visual_indent(peek);
1056 if !peek.trim().is_empty() && peek_indent >= FN_INDENT {
1057 fn_lines.push(FnLineType::Empty);
1058 last_consumed = i;
1059 i += 1;
1060 continue;
1061 }
1062 }
1063 // No valid continuation after blank — end of footnote
1064 break;
1065 }
1066
1067 consecutive_blanks = 0;
1068 let indent = visual_indent(next);
1069
1070 // Not indented enough — end of footnote
1071 if indent < FN_INDENT {
1072 break;
1073 }
1074
1075 // Inside a fenced code block: everything is verbatim until closing fence
1076 if in_fenced_code {
1077 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1078 if is_fence(next_trimmed) {
1079 in_fenced_code = false;
1080 }
1081 last_consumed = i;
1082 i += 1;
1083 continue;
1084 }
1085
1086 // Fence opener — start verbatim code block
1087 if is_fence(next_trimmed) {
1088 in_fenced_code = true;
1089 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1090 last_consumed = i;
1091 i += 1;
1092 continue;
1093 }
1094
1095 // Indented code block: indent >= FN_INDENT + 4 (= 8 spaces)
1096 if indent >= FN_INDENT + 4 {
1097 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1098 last_consumed = i;
1099 i += 1;
1100 continue;
1101 }
1102
1103 // Structural content that must be preserved verbatim
1104 if next_trimmed.starts_with('#')
1105 || is_list_item(next_trimmed)
1106 || next_trimmed.starts_with('>')
1107 || TableUtils::is_potential_table_row(next_trimmed)
1108 || is_setext_underline(next_trimmed)
1109 || is_horizontal_rule(next_trimmed)
1110 || crate::utils::mkdocs_footnotes::is_footnote_definition(next_trimmed)
1111 {
1112 // Preserve verbatim: blockquotes, tables, lists, setext
1113 // underlines, and horizontal rules inside the footnote
1114 if next_trimmed.starts_with('>')
1115 || TableUtils::is_potential_table_row(next_trimmed)
1116 || is_list_item(next_trimmed)
1117 || is_setext_underline(next_trimmed)
1118 || is_horizontal_rule(next_trimmed)
1119 {
1120 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1121 last_consumed = i;
1122 i += 1;
1123 continue;
1124 }
1125 // Headings, new footnote defs, link refs — end the footnote
1126 break;
1127 }
1128
1129 // Link reference definitions inside footnotes are not reflowable
1130 if next_trimmed.starts_with('[')
1131 && !next_trimmed.starts_with("[^")
1132 && next_trimmed.contains("]:")
1133 && LINK_REF_PATTERN.is_match(next_trimmed)
1134 {
1135 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1136 last_consumed = i;
1137 i += 1;
1138 continue;
1139 }
1140
1141 // HTML-only lines inside footnotes are not reflowable
1142 if is_html_only_line(next_trimmed) {
1143 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1144 last_consumed = i;
1145 i += 1;
1146 continue;
1147 }
1148
1149 // Regular prose content
1150 fn_lines.push(FnLineType::Content(next_trimmed.to_string()));
1151 last_consumed = i;
1152 i += 1;
1153 }
1154
1155 // Nothing collected or only empty lines
1156 if fn_lines.iter().all(|l| matches!(l, FnLineType::Empty)) || fn_lines.is_empty() {
1157 continue;
1158 }
1159
1160 // --- Group into blocks ---
1161 #[derive(Debug)]
1162 enum FnBlock {
1163 Paragraph(Vec<String>),
1164 Verbatim(Vec<(String, usize)>), // (content, indent) preserved as-is
1165 }
1166
1167 let mut blocks: Vec<FnBlock> = Vec::new();
1168 let mut current_para: Vec<String> = Vec::new();
1169 let mut current_verbatim: Vec<(String, usize)> = Vec::new();
1170
1171 for fl in &fn_lines {
1172 match fl {
1173 FnLineType::Content(s) => {
1174 if !current_verbatim.is_empty() {
1175 blocks.push(FnBlock::Verbatim(std::mem::take(&mut current_verbatim)));
1176 }
1177 current_para.push(s.clone());
1178 }
1179 FnLineType::Verbatim(s, indent) => {
1180 if !current_para.is_empty() {
1181 blocks.push(FnBlock::Paragraph(std::mem::take(&mut current_para)));
1182 }
1183 current_verbatim.push((s.clone(), *indent));
1184 }
1185 FnLineType::Empty => {
1186 if !current_para.is_empty() {
1187 blocks.push(FnBlock::Paragraph(std::mem::take(&mut current_para)));
1188 }
1189 if !current_verbatim.is_empty() {
1190 blocks.push(FnBlock::Verbatim(std::mem::take(&mut current_verbatim)));
1191 }
1192 }
1193 }
1194 }
1195 if !current_para.is_empty() {
1196 blocks.push(FnBlock::Paragraph(current_para));
1197 }
1198 if !current_verbatim.is_empty() {
1199 blocks.push(FnBlock::Verbatim(current_verbatim));
1200 }
1201
1202 // --- Reflow paragraphs and reconstruct ---
1203 let prefix_display_width = prefix.chars().count() + 1; // +1 for space
1204 let reflow_line_length = if config.line_length.is_unlimited() {
1205 usize::MAX
1206 } else {
1207 config
1208 .line_length
1209 .get()
1210 .saturating_sub(FN_INDENT.max(prefix_display_width))
1211 .max(20)
1212 };
1213 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1214 line_length: reflow_line_length,
1215 break_on_sentences: true,
1216 preserve_breaks: false,
1217 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1218 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1219 abbreviations: config.abbreviations_for_reflow(),
1220 length_mode: self.reflow_length_mode(),
1221 attr_lists: ctx.flavor.supports_attr_lists(),
1222 require_sentence_capital: config.require_sentence_capital,
1223 max_list_continuation_indent: None,
1224 };
1225
1226 let indent_str = " ".repeat(FN_INDENT);
1227 let mut result_lines: Vec<String> = Vec::new();
1228 let mut is_first_block = true;
1229
1230 for block in &blocks {
1231 match block {
1232 FnBlock::Paragraph(para_lines) => {
1233 let paragraph_text = para_lines.join(" ");
1234 let paragraph_text = paragraph_text.trim();
1235 if paragraph_text.is_empty() {
1236 continue;
1237 }
1238
1239 let reflowed = crate::utils::text_reflow::reflow_line(paragraph_text, &reflow_options);
1240 if reflowed.is_empty() {
1241 continue;
1242 }
1243
1244 // Blank line separator between blocks
1245 if !result_lines.is_empty() {
1246 result_lines.push(String::new());
1247 }
1248
1249 for (idx, rline) in reflowed.iter().enumerate() {
1250 if is_first_block && idx == 0 {
1251 result_lines.push(format!("{prefix} {rline}"));
1252 } else {
1253 result_lines.push(format!("{indent_str}{rline}"));
1254 }
1255 }
1256 is_first_block = false;
1257 }
1258 FnBlock::Verbatim(verb_lines) => {
1259 // Blank line separator between blocks
1260 if !result_lines.is_empty() {
1261 result_lines.push(String::new());
1262 }
1263
1264 if is_first_block {
1265 // Verbatim as first block in a deferred-body footnote
1266 if deferred_body {
1267 result_lines.push(prefix.to_string());
1268 }
1269 is_first_block = false;
1270 }
1271 for (content, _orig_indent) in verb_lines {
1272 result_lines.push(format!("{indent_str}{content}"));
1273 }
1274 }
1275 }
1276 }
1277
1278 // If nothing was produced, skip
1279 if result_lines.is_empty() {
1280 continue;
1281 }
1282
1283 let reflowed_text = result_lines.join(line_ending);
1284
1285 // Calculate byte range using last_consumed
1286 let start_range = line_index.whole_line_range(footnote_start + 1);
1287 let end_range = if last_consumed == lines.len() - 1 && !ctx.content.ends_with('\n') {
1288 line_index.line_text_range(last_consumed + 1, 1, lines[last_consumed].len() + 1)
1289 } else {
1290 line_index.whole_line_range(last_consumed + 1)
1291 };
1292 let byte_range = start_range.start..end_range.end;
1293
1294 let replacement = if last_consumed < lines.len() - 1 || ctx.content.ends_with('\n') {
1295 format!("{reflowed_text}{line_ending}")
1296 } else {
1297 reflowed_text
1298 };
1299
1300 let original_text = &ctx.content[byte_range.clone()];
1301 let max_length = (footnote_start..=last_consumed)
1302 .map(|idx| self.calculate_effective_length(lines[idx]))
1303 .max()
1304 .unwrap_or(0);
1305 let line_limit = if config.line_length.is_unlimited() {
1306 usize::MAX
1307 } else {
1308 config.line_length.get()
1309 };
1310 if original_text != replacement && max_length > line_limit {
1311 warnings.push(LintWarning {
1312 rule_name: Some(self.name().to_string()),
1313 message: format!(
1314 "Line length {} exceeds {} characters",
1315 max_length,
1316 config.line_length.get()
1317 ),
1318 line: footnote_start + 1,
1319 column: 1,
1320 end_line: last_consumed + 1,
1321 end_column: lines[last_consumed].len() + 1,
1322 severity: Severity::Warning,
1323 fix: Some(crate::rule::Fix {
1324 range: byte_range,
1325 replacement,
1326 }),
1327 });
1328 }
1329 continue;
1330 }
1331
1332 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
1333 if ctx
1334 .line_info(line_num)
1335 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container)
1336 {
1337 // Skip admonition/tab marker lines — only reflow their indented content
1338 let current_line = lines[i];
1339 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
1340 i += 1;
1341 continue;
1342 }
1343
1344 let container_start = i;
1345
1346 // Detect the actual indent level from the first content line
1347 // (supports nested admonitions with 8+ spaces)
1348 let first_line = lines[i];
1349 let base_indent_len = first_line.len() - first_line.trim_start().len();
1350 let base_indent: String = " ".repeat(base_indent_len);
1351
1352 // Collect consecutive MkDocs container paragraph lines
1353 let mut container_lines: Vec<&str> = Vec::new();
1354 while i < lines.len() {
1355 let current_line_num = i + 1;
1356 let line_info = ctx.line_info(current_line_num);
1357
1358 // Stop if we leave the MkDocs container
1359 if !line_info.is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container) {
1360 break;
1361 }
1362
1363 let line = lines[i];
1364
1365 // Stop at paragraph boundaries within the container
1366 if line.trim().is_empty() {
1367 break;
1368 }
1369
1370 // Skip list items, code blocks, headings, HTML-only lines within containers
1371 if is_list_item(line.trim())
1372 || line.trim().starts_with("```")
1373 || line.trim().starts_with("~~~")
1374 || line.trim().starts_with('#')
1375 || is_html_only_line(line)
1376 {
1377 break;
1378 }
1379
1380 container_lines.push(line);
1381 i += 1;
1382 }
1383
1384 if container_lines.is_empty() {
1385 // Must advance i to avoid infinite loop when we encounter
1386 // non-paragraph content (code block, list, heading, empty line)
1387 // at the start of an MkDocs container
1388 i += 1;
1389 continue;
1390 }
1391
1392 // Strip the base indent from each line and join for reflow
1393 let stripped_lines: Vec<&str> = container_lines
1394 .iter()
1395 .map(|line| {
1396 if line.starts_with(&base_indent) {
1397 &line[base_indent_len..]
1398 } else {
1399 line.trim_start()
1400 }
1401 })
1402 .collect();
1403 let paragraph_text = stripped_lines.join(" ");
1404
1405 // Check if reflow is needed
1406 let needs_reflow = match config.reflow_mode {
1407 ReflowMode::Normalize => self.normalize_mode_needs_reflow(container_lines.iter().copied(), config),
1408 ReflowMode::SentencePerLine => {
1409 let sentences = split_into_sentences(¶graph_text);
1410 sentences.len() > 1 || container_lines.len() > 1
1411 }
1412 ReflowMode::SemanticLineBreaks => {
1413 let sentences = split_into_sentences(¶graph_text);
1414 sentences.len() > 1
1415 || container_lines.len() > 1
1416 || container_lines
1417 .iter()
1418 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1419 }
1420 ReflowMode::Default => container_lines
1421 .iter()
1422 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
1423 };
1424
1425 if !needs_reflow {
1426 continue;
1427 }
1428
1429 // Calculate byte range for this container paragraph
1430 let start_range = line_index.whole_line_range(container_start + 1);
1431 let end_line = container_start + container_lines.len() - 1;
1432 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1433 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1434 } else {
1435 line_index.whole_line_range(end_line + 1)
1436 };
1437 let byte_range = start_range.start..end_range.end;
1438
1439 // Reflow with adjusted line length (accounting for the 4-space indent)
1440 let reflow_line_length = if config.line_length.is_unlimited() {
1441 usize::MAX
1442 } else {
1443 config.line_length.get().saturating_sub(base_indent_len).max(1)
1444 };
1445 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1446 line_length: reflow_line_length,
1447 break_on_sentences: true,
1448 preserve_breaks: false,
1449 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1450 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1451 abbreviations: config.abbreviations_for_reflow(),
1452 length_mode: self.reflow_length_mode(),
1453 attr_lists: ctx.flavor.supports_attr_lists(),
1454 require_sentence_capital: config.require_sentence_capital,
1455 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
1456 Some(4)
1457 } else {
1458 None
1459 },
1460 };
1461 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1462
1463 // Re-add the 4-space indent to each reflowed line
1464 let reflowed_with_indent: Vec<String> =
1465 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
1466 let reflowed_text = reflowed_with_indent.join(line_ending);
1467
1468 // Preserve trailing newline
1469 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1470 format!("{reflowed_text}{line_ending}")
1471 } else {
1472 reflowed_text
1473 };
1474
1475 // Only generate a warning if the replacement is different
1476 let original_text = &ctx.content[byte_range.clone()];
1477 if original_text != replacement {
1478 warnings.push(LintWarning {
1479 rule_name: Some(self.name().to_string()),
1480 message: format!(
1481 "Line length {} exceeds {} characters (in MkDocs container)",
1482 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
1483 config.line_length.get()
1484 ),
1485 line: container_start + 1,
1486 column: 1,
1487 end_line: end_line + 1,
1488 end_column: lines[end_line].len() + 1,
1489 severity: Severity::Warning,
1490 fix: Some(crate::rule::Fix {
1491 range: byte_range,
1492 replacement,
1493 }),
1494 });
1495 }
1496 continue;
1497 }
1498
1499 // Helper function to detect semantic line markers
1500 let is_semantic_line = |content: &str| -> bool {
1501 let trimmed = content.trim_start();
1502 let semantic_markers = [
1503 "NOTE:",
1504 "WARNING:",
1505 "IMPORTANT:",
1506 "CAUTION:",
1507 "TIP:",
1508 "DANGER:",
1509 "HINT:",
1510 "INFO:",
1511 ];
1512 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
1513 };
1514
1515 // Helper function to detect fence markers (opening or closing)
1516 let is_fence_marker = |content: &str| -> bool {
1517 let trimmed = content.trim_start();
1518 trimmed.starts_with("```") || trimmed.starts_with("~~~")
1519 };
1520
1521 // Check if this is a list item - handle it specially
1522 let trimmed = lines[i].trim();
1523 if is_list_item(trimmed) {
1524 // Collect the entire list item including continuation lines
1525 let list_start = i;
1526 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
1527 let marker_len = marker.len();
1528
1529 // Checkbox ([ ]/[x]/[X]) is inline content, not part of the list marker.
1530 // Use the base bullet/number marker width for continuation recognition
1531 // so that continuation lines at 2+ spaces are collected for "- [ ] " items.
1532 let base_marker_len = if marker.contains("[ ] ") || marker.contains("[x] ") || marker.contains("[X] ") {
1533 marker.find('[').unwrap_or(marker_len)
1534 } else {
1535 marker_len
1536 };
1537
1538 // MkDocs flavor requires at least 4 spaces for list continuation
1539 // after a blank line (multi-paragraph list items). For non-blank
1540 // continuation (lines directly following the marker line), use
1541 // the natural marker width so that 2-space indent is recognized.
1542 let item_indent = ctx.lines[i].indent;
1543 let min_continuation_indent = if ctx.flavor.requires_strict_list_indent() {
1544 // Use 4-space relative indent from the list item's nesting level
1545 item_indent + (base_marker_len - item_indent).max(4)
1546 } else {
1547 marker_len
1548 };
1549 let content_continuation_indent = base_marker_len;
1550
1551 // Track lines and their types (content, code block, fence, nested list)
1552 #[derive(Clone)]
1553 enum LineType {
1554 Content(String),
1555 CodeBlock(String, usize), // content and original indent
1556 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
1557 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1558 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
1559 AdmonitionHeader(String, usize), // header text (e.g. "!!! note") and original indent
1560 AdmonitionContent(String, usize), // body content text and original indent
1561 Empty,
1562 }
1563
1564 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1565 i += 1;
1566
1567 // Collect continuation lines using ctx.lines for metadata
1568 while i < lines.len() {
1569 let line_info = &ctx.lines[i];
1570
1571 // Use pre-computed is_blank from ctx
1572 if line_info.is_blank {
1573 // Empty line - check if next line is indented (part of list item)
1574 if i + 1 < lines.len() {
1575 let next_info = &ctx.lines[i + 1];
1576
1577 // Check if next line is indented enough to be continuation
1578 if !next_info.is_blank && next_info.indent >= min_continuation_indent {
1579 // This blank line is between paragraphs/blocks in the list item
1580 list_item_lines.push(LineType::Empty);
1581 i += 1;
1582 continue;
1583 }
1584 }
1585 // No indented line after blank, end of list item
1586 break;
1587 }
1588
1589 // Use pre-computed indent from ctx
1590 let indent = line_info.indent;
1591
1592 // Valid continuation must be indented at least content_continuation_indent.
1593 // For non-blank continuation, use marker_len (e.g. 2 for "- ").
1594 // MkDocs strict 4-space requirement applies only after blank lines.
1595 if indent >= content_continuation_indent {
1596 let trimmed = line_info.content(ctx.content).trim();
1597
1598 // Check for MkDocs admonition lines inside list items BEFORE
1599 // checking in_code_block. Lines inside code blocks within
1600 // admonitions have both in_admonition and in_code_block set;
1601 // admonition membership takes priority so the entire admonition
1602 // structure (including embedded code blocks) is preserved.
1603 if line_info.in_admonition {
1604 let raw_content = line_info.content(ctx.content);
1605 if mkdocs_admonitions::is_admonition_start(raw_content) {
1606 let header_text = raw_content[indent..].trim_end().to_string();
1607 list_item_lines.push(LineType::AdmonitionHeader(header_text, indent));
1608 } else {
1609 let body_text = raw_content[indent..].trim_end().to_string();
1610 list_item_lines.push(LineType::AdmonitionContent(body_text, indent));
1611 }
1612 i += 1;
1613 continue;
1614 }
1615
1616 // Use pre-computed in_code_block from ctx
1617 if line_info.in_code_block {
1618 list_item_lines.push(LineType::CodeBlock(
1619 line_info.content(ctx.content)[indent..].to_string(),
1620 indent,
1621 ));
1622 i += 1;
1623 continue;
1624 }
1625
1626 // Check if this is a SIBLING list item (breaks parent)
1627 // Nested lists are indented >= marker_len and are PART of the parent item
1628 // Siblings are at indent < marker_len (at or before parent marker)
1629 if is_list_item(trimmed) && indent < marker_len {
1630 // This is a sibling item at same or higher level - end parent item
1631 break;
1632 }
1633
1634 // Nested list items are always processed independently
1635 // by the outer loop, so break when we encounter one.
1636 // If a blank line was collected before this, uncollect it
1637 // so the outer loop preserves the blank between parent and nested.
1638 if is_list_item(trimmed) && indent >= marker_len {
1639 if matches!(list_item_lines.last(), Some(LineType::Empty)) {
1640 list_item_lines.pop();
1641 i -= 1;
1642 }
1643 break;
1644 }
1645
1646 // Normal continuation vs indented code block.
1647 // Use min_continuation_indent for the threshold since
1648 // code blocks start 4 spaces beyond the expected content
1649 // level (which is min_continuation_indent for MkDocs).
1650 if indent <= min_continuation_indent + 3 {
1651 // Extract content (remove indentation and trailing whitespace)
1652 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1653 // See: https://github.com/rvben/rumdl/issues/76
1654 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1655
1656 // Check if this is a div marker (::: opening or closing)
1657 // These must be preserved on their own line, not merged into paragraphs
1658 if line_info.is_div_marker {
1659 list_item_lines.push(LineType::DivMarker(content));
1660 }
1661 // Check if this is a fence marker (opening or closing)
1662 // These should be treated as code block lines, not paragraph content
1663 else if is_fence_marker(&content) {
1664 list_item_lines.push(LineType::CodeBlock(content, indent));
1665 }
1666 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1667 else if is_semantic_line(&content) {
1668 list_item_lines.push(LineType::SemanticLine(content));
1669 }
1670 // Check if this is a snippet block delimiter (-8<- or --8<--)
1671 // These must be preserved on their own lines for MkDocs Snippets extension
1672 else if is_snippet_block_delimiter(&content) {
1673 list_item_lines.push(LineType::SnippetLine(content));
1674 } else {
1675 list_item_lines.push(LineType::Content(content));
1676 }
1677 i += 1;
1678 } else {
1679 // indent >= min_continuation_indent + 4: indented code block
1680 list_item_lines.push(LineType::CodeBlock(
1681 line_info.content(ctx.content)[indent..].to_string(),
1682 indent,
1683 ));
1684 i += 1;
1685 }
1686 } else {
1687 // Not indented enough, end of list item
1688 break;
1689 }
1690 }
1691
1692 // Determine the output continuation indent.
1693 // Normalize/Default modes canonicalize to min_continuation_indent
1694 // (fixing over-indented continuation). Semantic/SentencePerLine
1695 // modes preserve the user's actual indent since they only fix
1696 // line breaking, not indentation.
1697 let indent_size = match config.reflow_mode {
1698 ReflowMode::SemanticLineBreaks | ReflowMode::SentencePerLine => {
1699 // Find indent of the first plain text continuation line,
1700 // skipping the marker line (index 0), nested list items,
1701 // code blocks, and blank lines.
1702 list_item_lines
1703 .iter()
1704 .enumerate()
1705 .skip(1)
1706 .find_map(|(k, lt)| {
1707 if matches!(lt, LineType::Content(_)) {
1708 Some(ctx.lines[list_start + k].indent)
1709 } else {
1710 None
1711 }
1712 })
1713 .unwrap_or(min_continuation_indent)
1714 }
1715 _ => min_continuation_indent,
1716 };
1717 // For checkbox items in mkdocs flavor, enforce minimum indent so
1718 // continuation lines use the structural list indent (4), not the
1719 // content-aligned indent (6) which Python-Markdown doesn't support
1720 let has_checkbox = base_marker_len < marker_len;
1721 let indent_size = if has_checkbox && ctx.flavor.requires_strict_list_indent() {
1722 indent_size.max(min_continuation_indent)
1723 } else {
1724 indent_size
1725 };
1726 let expected_indent = " ".repeat(indent_size);
1727
1728 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1729 #[derive(Clone)]
1730 enum Block {
1731 Paragraph(Vec<String>),
1732 Code {
1733 lines: Vec<(String, usize)>, // (content, indent) pairs
1734 has_preceding_blank: bool, // Whether there was a blank line before this block
1735 },
1736 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1737 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
1738 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1739 Html {
1740 lines: Vec<String>, // HTML content preserved exactly as-is
1741 has_preceding_blank: bool, // Whether there was a blank line before this block
1742 },
1743 Admonition {
1744 header: String, // e.g. "!!! note" or "??? warning \"Title\""
1745 header_indent: usize, // original indent of the header line
1746 content_lines: Vec<(String, usize)>, // (text, original_indent) pairs for body lines
1747 },
1748 }
1749
1750 // HTML tag detection helpers
1751 // Block-level HTML tags that should trigger HTML block detection
1752 const BLOCK_LEVEL_TAGS: &[&str] = &[
1753 "div",
1754 "details",
1755 "summary",
1756 "section",
1757 "article",
1758 "header",
1759 "footer",
1760 "nav",
1761 "aside",
1762 "main",
1763 "table",
1764 "thead",
1765 "tbody",
1766 "tfoot",
1767 "tr",
1768 "td",
1769 "th",
1770 "ul",
1771 "ol",
1772 "li",
1773 "dl",
1774 "dt",
1775 "dd",
1776 "pre",
1777 "blockquote",
1778 "figure",
1779 "figcaption",
1780 "form",
1781 "fieldset",
1782 "legend",
1783 "hr",
1784 "p",
1785 "h1",
1786 "h2",
1787 "h3",
1788 "h4",
1789 "h5",
1790 "h6",
1791 "style",
1792 "script",
1793 "noscript",
1794 ];
1795
1796 fn is_block_html_opening_tag(line: &str) -> Option<String> {
1797 let trimmed = line.trim();
1798
1799 // Check for HTML comments
1800 if trimmed.starts_with("<!--") {
1801 return Some("!--".to_string());
1802 }
1803
1804 // Check for opening tags
1805 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1806 // Extract tag name from <tagname ...> or <tagname>
1807 let after_bracket = &trimmed[1..];
1808 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1809 let tag_name = after_bracket[..end].to_lowercase();
1810
1811 // Only treat as block if it's a known block-level tag
1812 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1813 return Some(tag_name);
1814 }
1815 }
1816 }
1817 None
1818 }
1819
1820 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1821 let trimmed = line.trim();
1822
1823 // Special handling for HTML comments
1824 if tag_name == "!--" {
1825 return trimmed.ends_with("-->");
1826 }
1827
1828 // Check for closing tags: </tagname> or </tagname ...>
1829 trimmed.starts_with(&format!("</{tag_name}>"))
1830 || trimmed.starts_with(&format!("</{tag_name} "))
1831 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1832 }
1833
1834 fn is_self_closing_tag(line: &str) -> bool {
1835 let trimmed = line.trim();
1836 trimmed.ends_with("/>")
1837 }
1838
1839 let mut blocks: Vec<Block> = Vec::new();
1840 let mut current_paragraph: Vec<String> = Vec::new();
1841 let mut current_code_block: Vec<(String, usize)> = Vec::new();
1842 let mut current_html_block: Vec<String> = Vec::new();
1843 let mut html_tag_stack: Vec<String> = Vec::new();
1844 let mut in_code = false;
1845 let mut in_html_block = false;
1846 let mut had_preceding_blank = false; // Track if we just saw an empty line
1847 let mut code_block_has_preceding_blank = false; // Track blank before current code block
1848 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1849
1850 // Track admonition context for block building
1851 let mut in_admonition_block = false;
1852 let mut admonition_header: Option<(String, usize)> = None; // (header_text, indent)
1853 let mut admonition_content: Vec<(String, usize)> = Vec::new();
1854
1855 // Flush any pending admonition block into `blocks`
1856 let flush_admonition = |blocks: &mut Vec<Block>,
1857 in_admonition: &mut bool,
1858 header: &mut Option<(String, usize)>,
1859 content: &mut Vec<(String, usize)>| {
1860 if *in_admonition {
1861 if let Some((h, hi)) = header.take() {
1862 blocks.push(Block::Admonition {
1863 header: h,
1864 header_indent: hi,
1865 content_lines: std::mem::take(content),
1866 });
1867 }
1868 *in_admonition = false;
1869 }
1870 };
1871
1872 for line in &list_item_lines {
1873 match line {
1874 LineType::Empty => {
1875 if in_admonition_block {
1876 // Blank lines inside admonitions separate paragraphs within the body
1877 admonition_content.push((String::new(), 0));
1878 } else if in_code {
1879 current_code_block.push((String::new(), 0));
1880 } else if in_html_block {
1881 // Allow blank lines inside HTML blocks
1882 current_html_block.push(String::new());
1883 } else if !current_paragraph.is_empty() {
1884 blocks.push(Block::Paragraph(current_paragraph.clone()));
1885 current_paragraph.clear();
1886 }
1887 // Mark that we saw a blank line
1888 had_preceding_blank = true;
1889 }
1890 LineType::Content(content) => {
1891 flush_admonition(
1892 &mut blocks,
1893 &mut in_admonition_block,
1894 &mut admonition_header,
1895 &mut admonition_content,
1896 );
1897 // Check if we're currently in an HTML block
1898 if in_html_block {
1899 current_html_block.push(content.clone());
1900
1901 // Check if this line closes any open HTML tags
1902 if let Some(last_tag) = html_tag_stack.last() {
1903 if is_html_closing_tag(content, last_tag) {
1904 html_tag_stack.pop();
1905
1906 // If stack is empty, HTML block is complete
1907 if html_tag_stack.is_empty() {
1908 blocks.push(Block::Html {
1909 lines: current_html_block.clone(),
1910 has_preceding_blank: html_block_has_preceding_blank,
1911 });
1912 current_html_block.clear();
1913 in_html_block = false;
1914 }
1915 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1916 // Nested opening tag within HTML block
1917 if !is_self_closing_tag(content) {
1918 html_tag_stack.push(new_tag);
1919 }
1920 }
1921 }
1922 had_preceding_blank = false;
1923 } else {
1924 // Not in HTML block - check if this line starts one
1925 if let Some(tag_name) = is_block_html_opening_tag(content) {
1926 // Flush current paragraph before starting HTML block
1927 if in_code {
1928 blocks.push(Block::Code {
1929 lines: current_code_block.clone(),
1930 has_preceding_blank: code_block_has_preceding_blank,
1931 });
1932 current_code_block.clear();
1933 in_code = false;
1934 } else if !current_paragraph.is_empty() {
1935 blocks.push(Block::Paragraph(current_paragraph.clone()));
1936 current_paragraph.clear();
1937 }
1938
1939 // Start new HTML block
1940 in_html_block = true;
1941 html_block_has_preceding_blank = had_preceding_blank;
1942 current_html_block.push(content.clone());
1943
1944 // Check if it's self-closing or needs a closing tag
1945 if is_self_closing_tag(content) {
1946 // Self-closing tag - complete the HTML block immediately
1947 blocks.push(Block::Html {
1948 lines: current_html_block.clone(),
1949 has_preceding_blank: html_block_has_preceding_blank,
1950 });
1951 current_html_block.clear();
1952 in_html_block = false;
1953 } else {
1954 // Regular opening tag - push to stack
1955 html_tag_stack.push(tag_name);
1956 }
1957 } else {
1958 // Regular content line - add to paragraph
1959 if in_code {
1960 // Switching from code to content
1961 blocks.push(Block::Code {
1962 lines: current_code_block.clone(),
1963 has_preceding_blank: code_block_has_preceding_blank,
1964 });
1965 current_code_block.clear();
1966 in_code = false;
1967 }
1968 current_paragraph.push(content.clone());
1969 }
1970 had_preceding_blank = false; // Reset after content
1971 }
1972 }
1973 LineType::CodeBlock(content, indent) => {
1974 flush_admonition(
1975 &mut blocks,
1976 &mut in_admonition_block,
1977 &mut admonition_header,
1978 &mut admonition_content,
1979 );
1980 if in_html_block {
1981 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1982 blocks.push(Block::Html {
1983 lines: current_html_block.clone(),
1984 has_preceding_blank: html_block_has_preceding_blank,
1985 });
1986 current_html_block.clear();
1987 html_tag_stack.clear();
1988 in_html_block = false;
1989 }
1990 if !in_code {
1991 // Switching from content to code
1992 if !current_paragraph.is_empty() {
1993 blocks.push(Block::Paragraph(current_paragraph.clone()));
1994 current_paragraph.clear();
1995 }
1996 in_code = true;
1997 // Record whether there was a blank line before this code block
1998 code_block_has_preceding_blank = had_preceding_blank;
1999 }
2000 current_code_block.push((content.clone(), *indent));
2001 had_preceding_blank = false; // Reset after code
2002 }
2003 LineType::SemanticLine(content) => {
2004 // Semantic lines are standalone - flush any current block and add as separate block
2005 flush_admonition(
2006 &mut blocks,
2007 &mut in_admonition_block,
2008 &mut admonition_header,
2009 &mut admonition_content,
2010 );
2011 if in_code {
2012 blocks.push(Block::Code {
2013 lines: current_code_block.clone(),
2014 has_preceding_blank: code_block_has_preceding_blank,
2015 });
2016 current_code_block.clear();
2017 in_code = false;
2018 } else if in_html_block {
2019 blocks.push(Block::Html {
2020 lines: current_html_block.clone(),
2021 has_preceding_blank: html_block_has_preceding_blank,
2022 });
2023 current_html_block.clear();
2024 html_tag_stack.clear();
2025 in_html_block = false;
2026 } else if !current_paragraph.is_empty() {
2027 blocks.push(Block::Paragraph(current_paragraph.clone()));
2028 current_paragraph.clear();
2029 }
2030 // Add semantic line as its own block
2031 blocks.push(Block::SemanticLine(content.clone()));
2032 had_preceding_blank = false; // Reset after semantic line
2033 }
2034 LineType::SnippetLine(content) => {
2035 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
2036 // Unlike semantic lines, snippet lines don't add extra blank lines around them
2037 flush_admonition(
2038 &mut blocks,
2039 &mut in_admonition_block,
2040 &mut admonition_header,
2041 &mut admonition_content,
2042 );
2043 if in_code {
2044 blocks.push(Block::Code {
2045 lines: current_code_block.clone(),
2046 has_preceding_blank: code_block_has_preceding_blank,
2047 });
2048 current_code_block.clear();
2049 in_code = false;
2050 } else if in_html_block {
2051 blocks.push(Block::Html {
2052 lines: current_html_block.clone(),
2053 has_preceding_blank: html_block_has_preceding_blank,
2054 });
2055 current_html_block.clear();
2056 html_tag_stack.clear();
2057 in_html_block = false;
2058 } else if !current_paragraph.is_empty() {
2059 blocks.push(Block::Paragraph(current_paragraph.clone()));
2060 current_paragraph.clear();
2061 }
2062 // Add snippet line as its own block
2063 blocks.push(Block::SnippetLine(content.clone()));
2064 had_preceding_blank = false;
2065 }
2066 LineType::DivMarker(content) => {
2067 // Div markers (::: opening or closing) are standalone structural delimiters
2068 // Flush any current block and add as separate block
2069 flush_admonition(
2070 &mut blocks,
2071 &mut in_admonition_block,
2072 &mut admonition_header,
2073 &mut admonition_content,
2074 );
2075 if in_code {
2076 blocks.push(Block::Code {
2077 lines: current_code_block.clone(),
2078 has_preceding_blank: code_block_has_preceding_blank,
2079 });
2080 current_code_block.clear();
2081 in_code = false;
2082 } else if in_html_block {
2083 blocks.push(Block::Html {
2084 lines: current_html_block.clone(),
2085 has_preceding_blank: html_block_has_preceding_blank,
2086 });
2087 current_html_block.clear();
2088 html_tag_stack.clear();
2089 in_html_block = false;
2090 } else if !current_paragraph.is_empty() {
2091 blocks.push(Block::Paragraph(current_paragraph.clone()));
2092 current_paragraph.clear();
2093 }
2094 blocks.push(Block::DivMarker(content.clone()));
2095 had_preceding_blank = false;
2096 }
2097 LineType::AdmonitionHeader(header_text, indent) => {
2098 flush_admonition(
2099 &mut blocks,
2100 &mut in_admonition_block,
2101 &mut admonition_header,
2102 &mut admonition_content,
2103 );
2104 // Flush other current blocks
2105 if in_code {
2106 blocks.push(Block::Code {
2107 lines: current_code_block.clone(),
2108 has_preceding_blank: code_block_has_preceding_blank,
2109 });
2110 current_code_block.clear();
2111 in_code = false;
2112 } else if in_html_block {
2113 blocks.push(Block::Html {
2114 lines: current_html_block.clone(),
2115 has_preceding_blank: html_block_has_preceding_blank,
2116 });
2117 current_html_block.clear();
2118 html_tag_stack.clear();
2119 in_html_block = false;
2120 } else if !current_paragraph.is_empty() {
2121 blocks.push(Block::Paragraph(current_paragraph.clone()));
2122 current_paragraph.clear();
2123 }
2124 // Start new admonition block
2125 in_admonition_block = true;
2126 admonition_header = Some((header_text.clone(), *indent));
2127 admonition_content.clear();
2128 had_preceding_blank = false;
2129 }
2130 LineType::AdmonitionContent(content, indent) => {
2131 if in_admonition_block {
2132 // Add to current admonition body
2133 admonition_content.push((content.clone(), *indent));
2134 } else {
2135 // Admonition content without a header should not happen,
2136 // but treat it as regular content to avoid data loss
2137 current_paragraph.push(content.clone());
2138 }
2139 had_preceding_blank = false;
2140 }
2141 }
2142 }
2143
2144 // Push all remaining pending blocks independently
2145 flush_admonition(
2146 &mut blocks,
2147 &mut in_admonition_block,
2148 &mut admonition_header,
2149 &mut admonition_content,
2150 );
2151 if in_code && !current_code_block.is_empty() {
2152 blocks.push(Block::Code {
2153 lines: current_code_block,
2154 has_preceding_blank: code_block_has_preceding_blank,
2155 });
2156 }
2157 if in_html_block && !current_html_block.is_empty() {
2158 blocks.push(Block::Html {
2159 lines: current_html_block,
2160 has_preceding_blank: html_block_has_preceding_blank,
2161 });
2162 }
2163 if !current_paragraph.is_empty() {
2164 blocks.push(Block::Paragraph(current_paragraph));
2165 }
2166
2167 // Helper: check if a line (raw source or stripped content) is exempt
2168 // from line-length checks. Link reference definitions are always exempt;
2169 // standalone link/image lines are exempt when strict mode is off.
2170 // Also checks content after stripping list markers, since list item
2171 // continuation lines may contain link ref defs.
2172 let is_exempt_line = |raw_line: &str| -> bool {
2173 let trimmed = raw_line.trim();
2174 // Link reference definitions: always exempt
2175 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
2176 return true;
2177 }
2178 // Also check after stripping list markers (for list item content)
2179 if is_list_item(trimmed) {
2180 let (_, content) = extract_list_marker_and_content(trimmed);
2181 let content_trimmed = content.trim();
2182 if content_trimmed.starts_with('[')
2183 && content_trimmed.contains("]:")
2184 && LINK_REF_PATTERN.is_match(content_trimmed)
2185 {
2186 return true;
2187 }
2188 }
2189 // Standalone link/image lines: exempt when not strict
2190 if !config.strict && is_standalone_link_or_image_line(raw_line) {
2191 return true;
2192 }
2193 // HTML-only lines: exempt when not strict
2194 if !config.strict && is_html_only_line(raw_line) {
2195 return true;
2196 }
2197 false
2198 };
2199
2200 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
2201 // Exclude link reference definitions and standalone link lines from content
2202 // so they don't pollute combined_content or trigger false reflow.
2203 let content_lines: Vec<String> = list_item_lines
2204 .iter()
2205 .filter_map(|line| {
2206 if let LineType::Content(s) = line {
2207 if is_exempt_line(s) {
2208 return None;
2209 }
2210 Some(s.clone())
2211 } else {
2212 None
2213 }
2214 })
2215 .collect();
2216
2217 // Check if we need to reflow this list item
2218 // We check the combined content to see if it exceeds length limits
2219 let combined_content = content_lines.join(" ").trim().to_string();
2220
2221 // Helper to check if we should reflow in normalize mode
2222 let should_normalize = || {
2223 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
2224 // DO normalize if it has plain text content that spans multiple lines
2225 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
2226 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
2227 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
2228 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
2229 let has_admonitions = blocks.iter().any(|b| matches!(b, Block::Admonition { .. }));
2230 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
2231
2232 // If we have structural blocks but no paragraphs, don't normalize
2233 if (has_code_blocks
2234 || has_semantic_lines
2235 || has_snippet_lines
2236 || has_div_markers
2237 || has_admonitions)
2238 && !has_paragraphs
2239 {
2240 return false;
2241 }
2242
2243 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
2244 if has_paragraphs {
2245 // Count only paragraphs that contain at least one non-exempt line.
2246 // Paragraphs consisting entirely of link ref defs or standalone links
2247 // should not trigger normalization.
2248 let paragraph_count = blocks
2249 .iter()
2250 .filter(|b| {
2251 if let Block::Paragraph(para_lines) = b {
2252 !para_lines.iter().all(|line| is_exempt_line(line))
2253 } else {
2254 false
2255 }
2256 })
2257 .count();
2258 if paragraph_count > 1 {
2259 // Multiple non-exempt paragraph blocks should be normalized
2260 return true;
2261 }
2262
2263 // Single paragraph block: normalize if it has multiple content lines
2264 if content_lines.len() > 1 {
2265 return true;
2266 }
2267 }
2268
2269 false
2270 };
2271
2272 let needs_reflow = match config.reflow_mode {
2273 ReflowMode::Normalize => {
2274 // Only reflow if:
2275 // 1. Any non-exempt paragraph, when joined, exceeds the limit, OR
2276 // 2. Any admonition content line exceeds the limit, OR
2277 // 3. The list item should be normalized (has multi-line plain text)
2278 let any_paragraph_exceeds = blocks.iter().any(|block| match block {
2279 Block::Paragraph(para_lines) => {
2280 if para_lines.iter().all(|line| is_exempt_line(line)) {
2281 return false;
2282 }
2283 let joined = para_lines.join(" ");
2284 let with_marker = format!("{}{}", " ".repeat(indent_size), joined.trim());
2285 self.calculate_effective_length(&with_marker) > config.line_length.get()
2286 }
2287 Block::Admonition {
2288 content_lines,
2289 header_indent,
2290 ..
2291 } => content_lines.iter().any(|(content, indent)| {
2292 if content.is_empty() {
2293 return false;
2294 }
2295 let with_indent = format!("{}{}", " ".repeat(*indent.max(header_indent)), content);
2296 self.calculate_effective_length(&with_indent) > config.line_length.get()
2297 }),
2298 _ => false,
2299 });
2300 if any_paragraph_exceeds {
2301 true
2302 } else {
2303 should_normalize()
2304 }
2305 }
2306 ReflowMode::SentencePerLine => {
2307 // Check if list item has multiple sentences
2308 let sentences = split_into_sentences(&combined_content);
2309 sentences.len() > 1
2310 }
2311 ReflowMode::SemanticLineBreaks => {
2312 let sentences = split_into_sentences(&combined_content);
2313 sentences.len() > 1
2314 || (list_start..i).any(|line_idx| {
2315 let line = lines[line_idx];
2316 let trimmed = line.trim();
2317 if trimmed.is_empty() || is_exempt_line(line) {
2318 return false;
2319 }
2320 self.calculate_effective_length(line) > config.line_length.get()
2321 })
2322 }
2323 ReflowMode::Default => {
2324 // In default mode, only reflow if any individual non-exempt line exceeds limit
2325 (list_start..i).any(|line_idx| {
2326 let line = lines[line_idx];
2327 let trimmed = line.trim();
2328 // Skip blank lines and exempt lines
2329 if trimmed.is_empty() || is_exempt_line(line) {
2330 return false;
2331 }
2332 self.calculate_effective_length(line) > config.line_length.get()
2333 })
2334 }
2335 };
2336
2337 if needs_reflow {
2338 let start_range = line_index.whole_line_range(list_start + 1);
2339 let end_line = i - 1;
2340 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2341 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2342 } else {
2343 line_index.whole_line_range(end_line + 1)
2344 };
2345 let byte_range = start_range.start..end_range.end;
2346
2347 // Reflow each block (paragraphs only, preserve code blocks)
2348 // When line_length = 0 (no limit), use a very large value for reflow
2349 let reflow_line_length = if config.line_length.is_unlimited() {
2350 usize::MAX
2351 } else {
2352 config.line_length.get().saturating_sub(indent_size).max(1)
2353 };
2354 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2355 line_length: reflow_line_length,
2356 break_on_sentences: true,
2357 preserve_breaks: false,
2358 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2359 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2360 abbreviations: config.abbreviations_for_reflow(),
2361 length_mode: self.reflow_length_mode(),
2362 attr_lists: ctx.flavor.supports_attr_lists(),
2363 require_sentence_capital: config.require_sentence_capital,
2364 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2365 Some(4)
2366 } else {
2367 None
2368 },
2369 };
2370
2371 let mut result: Vec<String> = Vec::new();
2372 let mut is_first_block = true;
2373
2374 for (block_idx, block) in blocks.iter().enumerate() {
2375 match block {
2376 Block::Paragraph(para_lines) => {
2377 // If every line in this paragraph is exempt (link ref defs,
2378 // standalone links), preserve the paragraph verbatim instead
2379 // of reflowing it. Reflowing would corrupt link ref defs.
2380 let all_exempt = para_lines.iter().all(|line| is_exempt_line(line));
2381
2382 if all_exempt {
2383 for (idx, line) in para_lines.iter().enumerate() {
2384 if is_first_block && idx == 0 {
2385 result.push(format!("{marker}{line}"));
2386 is_first_block = false;
2387 } else {
2388 result.push(format!("{expected_indent}{line}"));
2389 }
2390 }
2391 } else {
2392 // Split the paragraph into segments at hard break boundaries
2393 // Each segment can be reflowed independently
2394 let segments = split_into_segments(para_lines);
2395
2396 for (segment_idx, segment) in segments.iter().enumerate() {
2397 // Check if this segment ends with a hard break and what type
2398 let hard_break_type = segment.last().and_then(|line| {
2399 let line = line.strip_suffix('\r').unwrap_or(line);
2400 if line.ends_with('\\') {
2401 Some("\\")
2402 } else if line.ends_with(" ") {
2403 Some(" ")
2404 } else {
2405 None
2406 }
2407 });
2408
2409 // Join and reflow the segment (removing the hard break marker for processing)
2410 let segment_for_reflow: Vec<String> = segment
2411 .iter()
2412 .map(|line| {
2413 // Strip hard break marker (2 spaces or backslash) for reflow processing
2414 if line.ends_with('\\') {
2415 line[..line.len() - 1].trim_end().to_string()
2416 } else if line.ends_with(" ") {
2417 line[..line.len() - 2].trim_end().to_string()
2418 } else {
2419 line.clone()
2420 }
2421 })
2422 .collect();
2423
2424 let segment_text = segment_for_reflow.join(" ").trim().to_string();
2425 if !segment_text.is_empty() {
2426 let reflowed =
2427 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
2428
2429 if is_first_block && segment_idx == 0 {
2430 // First segment of first block starts with marker
2431 result.push(format!("{marker}{}", reflowed[0]));
2432 for line in reflowed.iter().skip(1) {
2433 result.push(format!("{expected_indent}{line}"));
2434 }
2435 is_first_block = false;
2436 } else {
2437 // Subsequent segments
2438 for line in reflowed {
2439 result.push(format!("{expected_indent}{line}"));
2440 }
2441 }
2442
2443 // If this segment had a hard break, add it back to the last line
2444 // Preserve the original hard break format (backslash or two spaces)
2445 if let Some(break_marker) = hard_break_type
2446 && let Some(last_line) = result.last_mut()
2447 {
2448 last_line.push_str(break_marker);
2449 }
2450 }
2451 }
2452 }
2453
2454 // Add blank line after paragraph block if there's a next block.
2455 // Check if next block is a code block that doesn't want a preceding blank.
2456 // Also don't add blank lines before snippet lines (they should stay tight).
2457 // Only add if not already ending with one (avoids double blanks).
2458 if block_idx < blocks.len() - 1 {
2459 let next_block = &blocks[block_idx + 1];
2460 let should_add_blank = match next_block {
2461 Block::Code {
2462 has_preceding_blank, ..
2463 } => *has_preceding_blank,
2464 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2465 _ => true, // For all other blocks, add blank line
2466 };
2467 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2468 result.push(String::new());
2469 }
2470 }
2471 }
2472 Block::Code {
2473 lines: code_lines,
2474 has_preceding_blank: _,
2475 } => {
2476 // Preserve code blocks as-is with original indentation
2477 // NOTE: Blank line before code block is handled by the previous block
2478 // (see paragraph block's logic above)
2479
2480 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
2481 if is_first_block && idx == 0 {
2482 // First line of first block gets marker
2483 result.push(format!(
2484 "{marker}{}",
2485 " ".repeat(orig_indent - marker_len) + content
2486 ));
2487 is_first_block = false;
2488 } else if content.is_empty() {
2489 result.push(String::new());
2490 } else {
2491 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2492 }
2493 }
2494 }
2495 Block::SemanticLine(content) => {
2496 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
2497 // Only add blank before if not already ending with one.
2498 if !is_first_block && result.last().is_none_or(|s: &String| !s.is_empty()) {
2499 result.push(String::new());
2500 }
2501
2502 if is_first_block {
2503 // First block starts with marker
2504 result.push(format!("{marker}{content}"));
2505 is_first_block = false;
2506 } else {
2507 // Subsequent blocks use expected indent
2508 result.push(format!("{expected_indent}{content}"));
2509 }
2510
2511 // Add blank line after semantic line if there's a next block.
2512 // Only add if not already ending with one.
2513 if block_idx < blocks.len() - 1 {
2514 let next_block = &blocks[block_idx + 1];
2515 let should_add_blank = match next_block {
2516 Block::Code {
2517 has_preceding_blank, ..
2518 } => *has_preceding_blank,
2519 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2520 _ => true, // For all other blocks, add blank line
2521 };
2522 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2523 result.push(String::new());
2524 }
2525 }
2526 }
2527 Block::SnippetLine(content) => {
2528 // Preserve snippet delimiters (-8<-) as-is on their own line
2529 // Unlike semantic lines, snippet lines don't add extra blank lines
2530 if is_first_block {
2531 // First block starts with marker
2532 result.push(format!("{marker}{content}"));
2533 is_first_block = false;
2534 } else {
2535 // Subsequent blocks use expected indent
2536 result.push(format!("{expected_indent}{content}"));
2537 }
2538 // No blank lines added before or after snippet delimiters
2539 }
2540 Block::DivMarker(content) => {
2541 // Preserve div markers (::: opening or closing) as-is on their own line
2542 if is_first_block {
2543 result.push(format!("{marker}{content}"));
2544 is_first_block = false;
2545 } else {
2546 result.push(format!("{expected_indent}{content}"));
2547 }
2548 }
2549 Block::Html {
2550 lines: html_lines,
2551 has_preceding_blank: _,
2552 } => {
2553 // Preserve HTML blocks exactly as-is with original indentation
2554 // NOTE: Blank line before HTML block is handled by the previous block
2555
2556 for (idx, line) in html_lines.iter().enumerate() {
2557 if is_first_block && idx == 0 {
2558 // First line of first block gets marker
2559 result.push(format!("{marker}{line}"));
2560 is_first_block = false;
2561 } else if line.is_empty() {
2562 // Preserve blank lines inside HTML blocks
2563 result.push(String::new());
2564 } else {
2565 // Preserve lines with their original content (already includes indentation)
2566 result.push(format!("{expected_indent}{line}"));
2567 }
2568 }
2569
2570 // Add blank line after HTML block if there's a next block.
2571 // Only add if not already ending with one (avoids double blanks
2572 // when the HTML block itself contained a trailing blank line).
2573 if block_idx < blocks.len() - 1 {
2574 let next_block = &blocks[block_idx + 1];
2575 let should_add_blank = match next_block {
2576 Block::Code {
2577 has_preceding_blank, ..
2578 } => *has_preceding_blank,
2579 Block::Html {
2580 has_preceding_blank, ..
2581 } => *has_preceding_blank,
2582 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2583 _ => true, // For all other blocks, add blank line
2584 };
2585 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2586 result.push(String::new());
2587 }
2588 }
2589 }
2590 Block::Admonition {
2591 header,
2592 header_indent,
2593 content_lines: admon_lines,
2594 } => {
2595 // Reconstruct admonition block with header at original indent
2596 // and body content reflowed to fit within the line length limit
2597
2598 // Add blank line before admonition if not first block
2599 if !is_first_block && result.last().is_none_or(|s: &String| !s.is_empty()) {
2600 result.push(String::new());
2601 }
2602
2603 // Output the header at its original indent
2604 let header_indent_str = " ".repeat(*header_indent);
2605 if is_first_block {
2606 result.push(format!(
2607 "{marker}{}",
2608 " ".repeat(header_indent.saturating_sub(marker_len)) + header
2609 ));
2610 is_first_block = false;
2611 } else {
2612 result.push(format!("{header_indent_str}{header}"));
2613 }
2614
2615 // Derive body indent from the first non-empty content line's
2616 // stored indent, falling back to header_indent + 4 for
2617 // empty-body admonitions
2618 let body_indent = admon_lines
2619 .iter()
2620 .find(|(content, _)| !content.is_empty())
2621 .map_or(header_indent + 4, |(_, indent)| *indent);
2622 let body_indent_str = " ".repeat(body_indent);
2623
2624 // Segment body content into code blocks (verbatim) and
2625 // text paragraphs (reflowable), separated by blank lines.
2626 // Code lines store (content, orig_indent) to reconstruct
2627 // internal indentation relative to body_indent.
2628 enum AdmonSegment {
2629 Text(Vec<String>),
2630 Code(Vec<(String, usize)>),
2631 }
2632
2633 let mut segments: Vec<AdmonSegment> = Vec::new();
2634 let mut current_text: Vec<String> = Vec::new();
2635 let mut current_code: Vec<(String, usize)> = Vec::new();
2636 let mut in_admon_code = false;
2637 // Track the opening fence character so closing fences
2638 // must match (backticks close backticks, tildes close tildes)
2639 let mut fence_char: char = '`';
2640
2641 // Opening fences: ``` or ~~~ followed by optional info string
2642 let get_opening_fence = |s: &str| -> Option<(char, usize)> {
2643 let t = s.trim_start();
2644 if t.starts_with("```") {
2645 Some(('`', t.bytes().take_while(|&b| b == b'`').count()))
2646 } else if t.starts_with("~~~") {
2647 Some(('~', t.bytes().take_while(|&b| b == b'~').count()))
2648 } else {
2649 None
2650 }
2651 };
2652 // Closing fences: ONLY fence chars + optional trailing spaces
2653 let get_closing_fence = |s: &str| -> Option<(char, usize)> {
2654 let t = s.trim();
2655 if t.starts_with("```") && t.bytes().all(|b| b == b'`') {
2656 Some(('`', t.len()))
2657 } else if t.starts_with("~~~") && t.bytes().all(|b| b == b'~') {
2658 Some(('~', t.len()))
2659 } else {
2660 None
2661 }
2662 };
2663 let mut fence_len: usize = 3;
2664
2665 for (content, orig_indent) in admon_lines {
2666 if in_admon_code {
2667 // Closing fence must use the same character, be
2668 // at least as long, and have no info string
2669 if let Some((ch, len)) = get_closing_fence(content)
2670 && ch == fence_char
2671 && len >= fence_len
2672 {
2673 current_code.push((content.clone(), *orig_indent));
2674 in_admon_code = false;
2675 segments.push(AdmonSegment::Code(std::mem::take(&mut current_code)));
2676 continue;
2677 }
2678 current_code.push((content.clone(), *orig_indent));
2679 } else if let Some((ch, len)) = get_opening_fence(content) {
2680 if !current_text.is_empty() {
2681 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2682 }
2683 in_admon_code = true;
2684 fence_char = ch;
2685 fence_len = len;
2686 current_code.push((content.clone(), *orig_indent));
2687 } else if content.is_empty() {
2688 if !current_text.is_empty() {
2689 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2690 }
2691 } else {
2692 current_text.push(content.clone());
2693 }
2694 }
2695 if in_admon_code && !current_code.is_empty() {
2696 segments.push(AdmonSegment::Code(std::mem::take(&mut current_code)));
2697 }
2698 if !current_text.is_empty() {
2699 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2700 }
2701
2702 // Build reflow options once for all text segments
2703 let admon_reflow_length = if config.line_length.is_unlimited() {
2704 usize::MAX
2705 } else {
2706 config.line_length.get().saturating_sub(body_indent).max(1)
2707 };
2708
2709 let admon_reflow_options = crate::utils::text_reflow::ReflowOptions {
2710 line_length: admon_reflow_length,
2711 break_on_sentences: true,
2712 preserve_breaks: false,
2713 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2714 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2715 abbreviations: config.abbreviations_for_reflow(),
2716 length_mode: self.reflow_length_mode(),
2717 attr_lists: ctx.flavor.supports_attr_lists(),
2718 require_sentence_capital: config.require_sentence_capital,
2719 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2720 Some(4)
2721 } else {
2722 None
2723 },
2724 };
2725
2726 // Output each segment
2727 for segment in &segments {
2728 // Blank line before each segment (after the header or previous segment)
2729 result.push(String::new());
2730
2731 match segment {
2732 AdmonSegment::Code(lines) => {
2733 for (line, orig_indent) in lines {
2734 if line.is_empty() {
2735 // Preserve blank lines inside code blocks
2736 result.push(String::new());
2737 } else {
2738 // Reconstruct with body_indent + any extra
2739 // indentation the line had beyond body_indent
2740 let extra = orig_indent.saturating_sub(body_indent);
2741 let indent_str = " ".repeat(body_indent + extra);
2742 result.push(format!("{indent_str}{line}"));
2743 }
2744 }
2745 }
2746 AdmonSegment::Text(lines) => {
2747 let paragraph_text = lines.join(" ").trim().to_string();
2748 if paragraph_text.is_empty() {
2749 continue;
2750 }
2751 let reflowed = crate::utils::text_reflow::reflow_line(
2752 ¶graph_text,
2753 &admon_reflow_options,
2754 );
2755 for line in &reflowed {
2756 result.push(format!("{body_indent_str}{line}"));
2757 }
2758 }
2759 }
2760 }
2761
2762 // Add blank line after admonition if there's a next block
2763 if block_idx < blocks.len() - 1 {
2764 let next_block = &blocks[block_idx + 1];
2765 let should_add_blank = match next_block {
2766 Block::Code {
2767 has_preceding_blank, ..
2768 } => *has_preceding_blank,
2769 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2770 _ => true,
2771 };
2772 if should_add_blank && result.last().is_none_or(|s: &String| !s.is_empty()) {
2773 result.push(String::new());
2774 }
2775 }
2776 }
2777 }
2778 }
2779
2780 let reflowed_text = result.join(line_ending);
2781
2782 // Preserve trailing newline
2783 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2784 format!("{reflowed_text}{line_ending}")
2785 } else {
2786 reflowed_text
2787 };
2788
2789 // Get the original text to compare
2790 let original_text = &ctx.content[byte_range.clone()];
2791
2792 // Only generate a warning if the replacement is different from the original
2793 if original_text != replacement {
2794 // Generate an appropriate message based on why reflow is needed
2795 let message = match config.reflow_mode {
2796 ReflowMode::SentencePerLine => {
2797 let num_sentences = split_into_sentences(&combined_content).len();
2798 let num_lines = content_lines.len();
2799 if num_lines == 1 {
2800 // Single line with multiple sentences
2801 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2802 } else {
2803 // Multiple lines - could be split sentences or mixed
2804 format!(
2805 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
2806 )
2807 }
2808 }
2809 ReflowMode::SemanticLineBreaks => {
2810 let num_sentences = split_into_sentences(&combined_content).len();
2811 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
2812 }
2813 ReflowMode::Normalize => {
2814 // Find the longest non-exempt paragraph when joined
2815 let max_para_length = blocks
2816 .iter()
2817 .filter_map(|block| {
2818 if let Block::Paragraph(para_lines) = block {
2819 if para_lines.iter().all(|line| is_exempt_line(line)) {
2820 return None;
2821 }
2822 let joined = para_lines.join(" ");
2823 let with_indent = format!("{}{}", " ".repeat(indent_size), joined.trim());
2824 Some(self.calculate_effective_length(&with_indent))
2825 } else {
2826 None
2827 }
2828 })
2829 .max()
2830 .unwrap_or(0);
2831 if max_para_length > config.line_length.get() {
2832 format!(
2833 "Line length {} exceeds {} characters",
2834 max_para_length,
2835 config.line_length.get()
2836 )
2837 } else {
2838 "Multi-line content can be normalized".to_string()
2839 }
2840 }
2841 ReflowMode::Default => {
2842 // Report the actual longest non-exempt line, not the combined content
2843 let max_length = (list_start..i)
2844 .filter(|&line_idx| {
2845 let line = lines[line_idx];
2846 let trimmed = line.trim();
2847 !trimmed.is_empty() && !is_exempt_line(line)
2848 })
2849 .map(|line_idx| self.calculate_effective_length(lines[line_idx]))
2850 .max()
2851 .unwrap_or(0);
2852 format!(
2853 "Line length {} exceeds {} characters",
2854 max_length,
2855 config.line_length.get()
2856 )
2857 }
2858 };
2859
2860 warnings.push(LintWarning {
2861 rule_name: Some(self.name().to_string()),
2862 message,
2863 line: list_start + 1,
2864 column: 1,
2865 end_line: end_line + 1,
2866 end_column: lines[end_line].len() + 1,
2867 severity: Severity::Warning,
2868 fix: Some(crate::rule::Fix {
2869 range: byte_range,
2870 replacement,
2871 }),
2872 });
2873 }
2874 }
2875 continue;
2876 }
2877
2878 // Found start of a paragraph - collect all lines in it
2879 let paragraph_start = i;
2880 let mut paragraph_lines = vec![lines[i]];
2881 i += 1;
2882
2883 while i < lines.len() {
2884 let next_line = lines[i];
2885 let next_line_num = i + 1;
2886 let next_trimmed = next_line.trim();
2887
2888 // Stop at paragraph boundaries
2889 if next_trimmed.is_empty()
2890 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
2891 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
2892 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
2893 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
2894 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
2895 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
2896 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_block)
2897 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
2898 || ctx
2899 .line_info(next_line_num)
2900 .is_some_and(super::super::lint_context::types::LineInfo::in_mkdocs_container)
2901 || (next_line_num > 0
2902 && next_line_num <= ctx.lines.len()
2903 && ctx.lines[next_line_num - 1].blockquote.is_some())
2904 || next_trimmed.starts_with('#')
2905 || TableUtils::is_potential_table_row(next_line)
2906 || is_list_item(next_trimmed)
2907 || is_horizontal_rule(next_line)
2908 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
2909 || is_template_directive_only(next_line)
2910 || is_standalone_attr_list(next_line)
2911 || is_snippet_block_delimiter(next_line)
2912 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
2913 || is_html_only_line(next_line)
2914 {
2915 break;
2916 }
2917
2918 // Check if the previous line ends with a hard break (2+ spaces or backslash)
2919 if i > 0 && has_hard_break(lines[i - 1]) {
2920 // Don't include lines after hard breaks in the same paragraph
2921 break;
2922 }
2923
2924 paragraph_lines.push(next_line);
2925 i += 1;
2926 }
2927
2928 // Compute the common leading indent of all non-empty paragraph lines,
2929 // but only when those lines are structurally inside a list block.
2930 // Indented continuation lines that follow a nested list arrive here
2931 // with their structural indentation intact (e.g. 2 spaces for a
2932 // top-level list item). Stripping the indent before reflow and
2933 // re-applying it afterward prevents the fixer from moving those
2934 // lines to column 0.
2935 //
2936 // The list-block guard is essential: top-level paragraphs that happen
2937 // to start with spaces (insignificant in Markdown) must NOT have those
2938 // spaces preserved or injected by the fixer.
2939 let common_indent: String = if ctx.is_in_list_block(paragraph_start + 1) {
2940 let min_len = paragraph_lines
2941 .iter()
2942 .filter(|l| !l.trim().is_empty())
2943 .map(|l| l.len() - l.trim_start().len())
2944 .min()
2945 .unwrap_or(0);
2946 paragraph_lines
2947 .iter()
2948 .find(|l| !l.trim().is_empty())
2949 .map(|l| l[..min_len].to_string())
2950 .unwrap_or_default()
2951 } else {
2952 String::new()
2953 };
2954
2955 // Combine paragraph lines into a single string for processing.
2956 // This must be done BEFORE the needs_reflow check for sentence-per-line mode.
2957 let paragraph_text = if common_indent.is_empty() {
2958 paragraph_lines.join(" ")
2959 } else {
2960 paragraph_lines
2961 .iter()
2962 .map(|l| {
2963 if l.starts_with(common_indent.as_str()) {
2964 &l[common_indent.len()..]
2965 } else {
2966 l.trim_start()
2967 }
2968 })
2969 .collect::<Vec<_>>()
2970 .join(" ")
2971 };
2972
2973 // Skip reflowing if this paragraph contains definition list items
2974 // Definition lists are multi-line structures that should not be joined
2975 let contains_definition_list = paragraph_lines
2976 .iter()
2977 .any(|line| crate::utils::is_definition_list_item(line));
2978
2979 if contains_definition_list {
2980 // Don't reflow definition lists - skip this paragraph
2981 i = paragraph_start + paragraph_lines.len();
2982 continue;
2983 }
2984
2985 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2986 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2987 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2988
2989 if contains_snippets {
2990 // Don't reflow Snippets blocks - skip this paragraph
2991 i = paragraph_start + paragraph_lines.len();
2992 continue;
2993 }
2994
2995 // Check if this paragraph needs reflowing
2996 let needs_reflow = match config.reflow_mode {
2997 ReflowMode::Normalize => self.normalize_mode_needs_reflow(paragraph_lines.iter().copied(), config),
2998 ReflowMode::SentencePerLine => {
2999 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
3000 // Note: we check the joined text because sentences can span multiple lines
3001 let sentences = split_into_sentences(¶graph_text);
3002
3003 // Always reflow if multiple sentences on one line
3004 if sentences.len() > 1 {
3005 true
3006 } else if paragraph_lines.len() > 1 {
3007 // For single-sentence paragraphs spanning multiple lines:
3008 // Reflow if they COULD fit on one line (respecting line-length constraint)
3009 if config.line_length.is_unlimited() {
3010 // No line-length constraint - always join single sentences
3011 true
3012 } else {
3013 // Only join if it fits within line-length.
3014 // paragraph_text has the common indent stripped, so add it
3015 // back to get the true output length before comparing.
3016 let effective_length =
3017 self.calculate_effective_length(¶graph_text) + common_indent.len();
3018 effective_length <= config.line_length.get()
3019 }
3020 } else {
3021 false
3022 }
3023 }
3024 ReflowMode::SemanticLineBreaks => {
3025 let sentences = split_into_sentences(¶graph_text);
3026 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
3027 sentences.len() > 1
3028 || paragraph_lines.len() > 1
3029 || paragraph_lines
3030 .iter()
3031 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
3032 }
3033 ReflowMode::Default => {
3034 // In default mode, only reflow if lines exceed limit
3035 paragraph_lines
3036 .iter()
3037 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
3038 }
3039 };
3040
3041 if needs_reflow {
3042 // Calculate byte range for this paragraph
3043 // Use whole_line_range for each line and combine
3044 let start_range = line_index.whole_line_range(paragraph_start + 1);
3045 let end_line = paragraph_start + paragraph_lines.len() - 1;
3046
3047 // For the last line, we want to preserve any trailing newline
3048 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
3049 // Last line without trailing newline - use line_text_range
3050 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
3051 } else {
3052 // Not the last line or has trailing newline - use whole_line_range
3053 line_index.whole_line_range(end_line + 1)
3054 };
3055
3056 let byte_range = start_range.start..end_range.end;
3057
3058 // Check if the paragraph ends with a hard break and what type
3059 let hard_break_type = paragraph_lines.last().and_then(|line| {
3060 let line = line.strip_suffix('\r').unwrap_or(line);
3061 if line.ends_with('\\') {
3062 Some("\\")
3063 } else if line.ends_with(" ") {
3064 Some(" ")
3065 } else {
3066 None
3067 }
3068 });
3069
3070 // Reflow the paragraph
3071 // When line_length = 0 (no limit), use a very large value for reflow
3072 let reflow_line_length = if config.line_length.is_unlimited() {
3073 usize::MAX
3074 } else {
3075 config.line_length.get()
3076 };
3077 let reflow_options = crate::utils::text_reflow::ReflowOptions {
3078 line_length: reflow_line_length,
3079 break_on_sentences: true,
3080 preserve_breaks: false,
3081 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
3082 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
3083 abbreviations: config.abbreviations_for_reflow(),
3084 length_mode: self.reflow_length_mode(),
3085 attr_lists: ctx.flavor.supports_attr_lists(),
3086 require_sentence_capital: config.require_sentence_capital,
3087 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
3088 Some(4)
3089 } else {
3090 None
3091 },
3092 };
3093 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
3094
3095 // Re-apply the common indent to each non-empty reflowed line so
3096 // that the replacement preserves the original structural indentation.
3097 if !common_indent.is_empty() {
3098 for line in &mut reflowed {
3099 if !line.is_empty() {
3100 *line = format!("{common_indent}{line}");
3101 }
3102 }
3103 }
3104
3105 // If the original paragraph ended with a hard break, preserve it
3106 // Preserve the original hard break format (backslash or two spaces)
3107 if let Some(break_marker) = hard_break_type
3108 && !reflowed.is_empty()
3109 {
3110 let last_idx = reflowed.len() - 1;
3111 if !has_hard_break(&reflowed[last_idx]) {
3112 reflowed[last_idx].push_str(break_marker);
3113 }
3114 }
3115
3116 let reflowed_text = reflowed.join(line_ending);
3117
3118 // Preserve trailing newline if the original paragraph had one
3119 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
3120 format!("{reflowed_text}{line_ending}")
3121 } else {
3122 reflowed_text
3123 };
3124
3125 // Get the original text to compare
3126 let original_text = &ctx.content[byte_range.clone()];
3127
3128 // Only generate a warning if the replacement is different from the original
3129 if original_text != replacement {
3130 // Create warning with actual fix
3131 // In default mode, report the specific line that violates
3132 // In normalize mode, report the whole paragraph
3133 // In sentence-per-line mode, report the entire paragraph
3134 let (warning_line, warning_end_line) = match config.reflow_mode {
3135 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
3136 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
3137 // Highlight the entire paragraph that needs reformatting
3138 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
3139 }
3140 ReflowMode::Default => {
3141 // Find the first line that exceeds the limit
3142 let mut violating_line = paragraph_start;
3143 for (idx, line) in paragraph_lines.iter().enumerate() {
3144 if self.calculate_effective_length(line) > config.line_length.get() {
3145 violating_line = paragraph_start + idx;
3146 break;
3147 }
3148 }
3149 (violating_line + 1, violating_line + 1)
3150 }
3151 };
3152
3153 warnings.push(LintWarning {
3154 rule_name: Some(self.name().to_string()),
3155 message: match config.reflow_mode {
3156 ReflowMode::Normalize => format!(
3157 "Paragraph could be normalized to use line length of {} characters",
3158 config.line_length.get()
3159 ),
3160 ReflowMode::SentencePerLine => {
3161 let num_sentences = split_into_sentences(¶graph_text).len();
3162 if paragraph_lines.len() == 1 {
3163 // Single line with multiple sentences
3164 format!("Line contains {num_sentences} sentences (one sentence per line required)")
3165 } else {
3166 let num_lines = paragraph_lines.len();
3167 // Multiple lines - could be split sentences or mixed
3168 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
3169 }
3170 },
3171 ReflowMode::SemanticLineBreaks => {
3172 let num_sentences = split_into_sentences(¶graph_text).len();
3173 format!(
3174 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
3175 )
3176 },
3177 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
3178 },
3179 line: warning_line,
3180 column: 1,
3181 end_line: warning_end_line,
3182 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
3183 severity: Severity::Warning,
3184 fix: Some(crate::rule::Fix {
3185 range: byte_range,
3186 replacement,
3187 }),
3188 });
3189 }
3190 }
3191 }
3192
3193 warnings
3194 }
3195
3196 /// Calculate string length based on the configured length mode
3197 fn calculate_string_length(&self, s: &str) -> usize {
3198 match self.config.length_mode {
3199 LengthMode::Chars => s.chars().count(),
3200 LengthMode::Visual => s.width(),
3201 LengthMode::Bytes => s.len(),
3202 }
3203 }
3204
3205 /// Calculate effective line length
3206 ///
3207 /// Returns the actual display length of the line using the configured length mode.
3208 fn calculate_effective_length(&self, line: &str) -> usize {
3209 self.calculate_string_length(line)
3210 }
3211
3212 /// Calculate line length with inline link/image URLs removed.
3213 ///
3214 /// For each inline link `[text](url)` or image `` on the line,
3215 /// computes the "savings" from removing the URL portion (keeping only `[text]`
3216 /// or `![alt]`). Returns `effective_length - total_savings`.
3217 ///
3218 /// Handles nested constructs (e.g., `[](url)`) by only counting the
3219 /// outermost construct to avoid double-counting.
3220 fn calculate_text_only_length(
3221 &self,
3222 effective_length: usize,
3223 line_number: usize,
3224 ctx: &crate::lint_context::LintContext,
3225 ) -> usize {
3226 let line_range = ctx.line_index.line_content_range(line_number);
3227 let line_byte_end = line_range.end;
3228
3229 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
3230 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
3231
3232 // Binary search: links are sorted by byte_offset, so link.line is non-decreasing
3233 let link_start = ctx.links.partition_point(|l| l.line < line_number);
3234 for link in &ctx.links[link_start..] {
3235 if link.line != line_number {
3236 break;
3237 }
3238 if link.is_reference {
3239 continue;
3240 }
3241 if !matches!(link.link_type, LinkType::Inline) {
3242 continue;
3243 }
3244 if link.byte_end > line_byte_end {
3245 continue;
3246 }
3247 let text_only_len = 2 + self.calculate_string_length(&link.text);
3248 constructs.push((link.byte_offset, link.byte_end, text_only_len));
3249 }
3250
3251 let img_start = ctx.images.partition_point(|i| i.line < line_number);
3252 for image in &ctx.images[img_start..] {
3253 if image.line != line_number {
3254 break;
3255 }
3256 if image.is_reference {
3257 continue;
3258 }
3259 if !matches!(image.link_type, LinkType::Inline) {
3260 continue;
3261 }
3262 if image.byte_end > line_byte_end {
3263 continue;
3264 }
3265 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
3266 constructs.push((image.byte_offset, image.byte_end, text_only_len));
3267 }
3268
3269 if constructs.is_empty() {
3270 return effective_length;
3271 }
3272
3273 // Sort by byte offset to handle overlapping/nested constructs
3274 constructs.sort_by_key(|&(start, _, _)| start);
3275
3276 let mut total_savings: usize = 0;
3277 let mut last_end: usize = 0;
3278
3279 for (start, end, text_only_len) in &constructs {
3280 // Skip constructs nested inside a previously counted one
3281 if *start < last_end {
3282 continue;
3283 }
3284 // Full construct length in configured length mode
3285 let full_source = &ctx.content[*start..*end];
3286 let full_len = self.calculate_string_length(full_source);
3287 total_savings += full_len.saturating_sub(*text_only_len);
3288 last_end = *end;
3289 }
3290
3291 effective_length.saturating_sub(total_savings)
3292 }
3293}