rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_html_only_line,
26 is_list_item, is_standalone_link_or_image_line, split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37 pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42 line_idx: usize,
43 data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48 Self {
49 config: MD013Config {
50 line_length: crate::types::LineLength::new(line_length),
51 code_blocks,
52 tables,
53 headings,
54 paragraphs: true, // Default to true for backwards compatibility
55 blockquotes: true, // Default to true for backwards compatibility
56 strict,
57 reflow: false,
58 reflow_mode: ReflowMode::default(),
59 length_mode: LengthMode::default(),
60 abbreviations: Vec::new(),
61 require_sentence_capital: true,
62 },
63 }
64 }
65
66 pub fn from_config_struct(config: MD013Config) -> Self {
67 Self { config }
68 }
69
70 /// Return a clone with code block checking disabled.
71 /// Used for doc comment linting where code blocks are Rust code managed by rustfmt.
72 pub fn with_code_blocks_disabled(&self) -> Self {
73 let mut clone = self.clone();
74 clone.config.code_blocks = false;
75 clone
76 }
77
78 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
79 fn reflow_length_mode(&self) -> ReflowLengthMode {
80 match self.config.length_mode {
81 LengthMode::Chars => ReflowLengthMode::Chars,
82 LengthMode::Visual => ReflowLengthMode::Visual,
83 LengthMode::Bytes => ReflowLengthMode::Bytes,
84 }
85 }
86
87 fn should_ignore_line(
88 &self,
89 line: &str,
90 _lines: &[&str],
91 current_line: usize,
92 ctx: &crate::lint_context::LintContext,
93 ) -> bool {
94 if self.config.strict {
95 return false;
96 }
97
98 // Quick check for common patterns before expensive regex
99 let trimmed = line.trim();
100
101 // Only skip if the entire line is a URL (quick check first)
102 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
103 return true;
104 }
105
106 // Only skip if the entire line is an image reference (quick check first)
107 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
108 return true;
109 }
110
111 // Note: link reference definitions are handled as always-exempt (even in strict mode)
112 // in the main check loop, so they don't need to be checked here.
113
114 // Code blocks with long strings (only check if in code block)
115 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
116 && !trimmed.is_empty()
117 && !line.contains(' ')
118 && !line.contains('\t')
119 {
120 return true;
121 }
122
123 false
124 }
125
126 /// Check if rule should skip based on provided config (used for inline config support)
127 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
128 // Skip if content is empty
129 if ctx.content.is_empty() {
130 return true;
131 }
132
133 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
134 if config.reflow
135 && (config.reflow_mode == ReflowMode::SentencePerLine
136 || config.reflow_mode == ReflowMode::SemanticLineBreaks
137 || config.reflow_mode == ReflowMode::Normalize)
138 {
139 return false;
140 }
141
142 // Quick check: if total content is shorter than line limit, definitely skip
143 if ctx.content.len() <= config.line_length.get() {
144 return true;
145 }
146
147 // Skip if no line exceeds the limit
148 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
149 }
150
151 fn normalize_mode_needs_reflow<'a, I>(&self, lines: I, config: &MD013Config) -> bool
152 where
153 I: IntoIterator<Item = &'a str>,
154 {
155 let mut line_count = 0;
156 let check_length = !config.line_length.is_unlimited();
157
158 for line in lines {
159 line_count += 1;
160 if check_length && self.calculate_effective_length(line) > config.line_length.get() {
161 return true;
162 }
163 }
164
165 line_count > 1
166 }
167}
168
169impl Rule for MD013LineLength {
170 fn name(&self) -> &'static str {
171 "MD013"
172 }
173
174 fn description(&self) -> &'static str {
175 "Line length should not be excessive"
176 }
177
178 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
179 // Use pre-parsed inline config from LintContext
180 let config_override = ctx.inline_config().get_rule_config("MD013");
181
182 // Apply configuration override if present
183 let effective_config = if let Some(json_config) = config_override {
184 if let Some(obj) = json_config.as_object() {
185 let mut config = self.config.clone();
186 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
187 config.line_length = crate::types::LineLength::new(line_length as usize);
188 }
189 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
190 config.code_blocks = code_blocks;
191 }
192 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
193 config.tables = tables;
194 }
195 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
196 config.headings = headings;
197 }
198 if let Some(blockquotes) = obj.get("blockquotes").and_then(|v| v.as_bool()) {
199 config.blockquotes = blockquotes;
200 }
201 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
202 config.strict = strict;
203 }
204 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
205 config.reflow = reflow;
206 }
207 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
208 config.reflow_mode = match reflow_mode {
209 "default" => ReflowMode::Default,
210 "normalize" => ReflowMode::Normalize,
211 "sentence-per-line" => ReflowMode::SentencePerLine,
212 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
213 _ => ReflowMode::default(),
214 };
215 }
216 config
217 } else {
218 self.config.clone()
219 }
220 } else {
221 self.config.clone()
222 };
223
224 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
225 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
226 if self.should_skip_with_config(ctx, &effective_config)
227 && !(effective_config.reflow
228 && (effective_config.reflow_mode == ReflowMode::Normalize
229 || effective_config.reflow_mode == ReflowMode::SentencePerLine
230 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
231 {
232 return Ok(Vec::new());
233 }
234
235 // Direct implementation without DocumentStructure
236 let mut warnings = Vec::new();
237
238 // Special handling: line_length = 0 means "no line length limit"
239 // Skip all line length checks, but still allow reflow if enabled
240 let skip_length_checks = effective_config.line_length.is_unlimited();
241
242 // Pre-filter lines that could be problematic to avoid processing all lines
243 let mut candidate_lines = Vec::new();
244 if !skip_length_checks {
245 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
246 // Skip front matter - it should never be linted
247 if line_info.in_front_matter {
248 continue;
249 }
250
251 // Quick length check first
252 if line_info.byte_len > effective_config.line_length.get() {
253 candidate_lines.push(line_idx);
254 }
255 }
256 }
257
258 // If no candidate lines and not in normalize or sentence-per-line mode, early return
259 if candidate_lines.is_empty()
260 && !(effective_config.reflow
261 && (effective_config.reflow_mode == ReflowMode::Normalize
262 || effective_config.reflow_mode == ReflowMode::SentencePerLine
263 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
264 {
265 return Ok(warnings);
266 }
267
268 let lines = ctx.raw_lines();
269
270 // Create a quick lookup set for heading lines
271 // We need this for both the heading skip check AND the paragraphs check
272 let heading_lines_set: std::collections::HashSet<usize> = ctx
273 .lines
274 .iter()
275 .enumerate()
276 .filter(|(_, line)| line.heading.is_some())
277 .map(|(idx, _)| idx + 1)
278 .collect();
279
280 // Use pre-computed table blocks from context
281 // We need this for both the table skip check AND the paragraphs check
282 let table_blocks = &ctx.table_blocks;
283 let mut table_lines_set = std::collections::HashSet::new();
284 for table in table_blocks {
285 table_lines_set.insert(table.header_line + 1);
286 table_lines_set.insert(table.delimiter_line + 1);
287 for &line in &table.content_lines {
288 table_lines_set.insert(line + 1);
289 }
290 }
291
292 // Process candidate lines for line length checks
293 'line_loop: for &line_idx in &candidate_lines {
294 let line_number = line_idx + 1;
295 let line = lines[line_idx];
296
297 // Calculate actual line length (used in warning messages)
298 let effective_length = self.calculate_effective_length(line);
299
300 // Use single line length limit for all content
301 let line_limit = effective_config.line_length.get();
302
303 // In non-strict mode, forgive the trailing non-whitespace run.
304 // If the line only exceeds the limit because of a long token at the end
305 // (URL, link chain, identifier), it passes. This matches markdownlint's
306 // behavior: line.replace(/\S*$/u, "#")
307 let check_length = if effective_config.strict {
308 effective_length
309 } else {
310 match line.rfind(char::is_whitespace) {
311 Some(pos) => {
312 let ws_char = line[pos..].chars().next().unwrap();
313 let prefix_end = pos + ws_char.len_utf8();
314 self.calculate_string_length(&line[..prefix_end]) + 1
315 }
316 None => 1, // No whitespace — entire line is a single token
317 }
318 };
319
320 // Skip lines where the check length is within the limit
321 if check_length <= line_limit {
322 continue;
323 }
324
325 // Semantic link understanding: suppress when excess comes entirely from inline URLs
326 if !effective_config.strict {
327 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
328 if text_only_length <= line_limit {
329 continue;
330 }
331 }
332
333 // Skip mkdocstrings and pymdown blocks (already handled by LintContext)
334 if ctx.lines[line_idx].in_mkdocstrings || ctx.lines[line_idx].in_pymdown_block {
335 continue;
336 }
337
338 // Link reference definitions are always exempt, even in strict mode.
339 // There's no way to shorten them without breaking the URL.
340 // Also check after stripping list markers, since list items may
341 // contain link ref defs as their content.
342 {
343 let trimmed = line.trim();
344 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
345 continue;
346 }
347 if is_list_item(trimmed) {
348 let (_, content) = extract_list_marker_and_content(trimmed);
349 let content_trimmed = content.trim();
350 if content_trimmed.starts_with('[')
351 && content_trimmed.contains("]:")
352 && LINK_REF_PATTERN.is_match(content_trimmed)
353 {
354 continue;
355 }
356 }
357 }
358
359 // Skip various block types efficiently
360 if !effective_config.strict {
361 // Lines whose only content is a link/image are exempt.
362 // After stripping list markers, blockquote markers, and emphasis,
363 // if only a link or image remains, there is no way to shorten it.
364 if is_standalone_link_or_image_line(line) {
365 continue;
366 }
367
368 // Lines consisting entirely of HTML tags are exempt.
369 // Badge lines, images with attributes, and similar inline HTML
370 // are long due to URLs in attributes and can't be meaningfully shortened.
371 if is_html_only_line(line) {
372 continue;
373 }
374
375 // Skip setext heading underlines
376 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
377 continue;
378 }
379
380 // Skip block elements according to config flags
381 // The flags mean: true = check these elements, false = skip these elements
382 // So we skip when the flag is FALSE and the line is in that element type
383 if (!effective_config.headings && heading_lines_set.contains(&line_number))
384 || (!effective_config.code_blocks
385 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
386 || (!effective_config.tables && table_lines_set.contains(&line_number))
387 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
388 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
389 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
390 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
391 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_block)
392 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
393 || ctx.line_info(line_number).is_some_and(|info| info.in_pymdown_block)
394 {
395 continue;
396 }
397
398 // Check if this is a paragraph/regular text line
399 // If paragraphs = false, skip lines that are NOT in special blocks
400 // Blockquote content is treated as paragraph text, so it's not
401 // included in the special blocks list here.
402 if !effective_config.paragraphs {
403 let is_special_block = heading_lines_set.contains(&line_number)
404 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
405 || table_lines_set.contains(&line_number)
406 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
407 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
408 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
409 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
410 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_block)
411 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
412 || ctx
413 .line_info(line_number)
414 .is_some_and(|info| info.in_mkdocs_container());
415
416 // Skip regular paragraph text when paragraphs = false
417 if !is_special_block {
418 continue;
419 }
420 }
421
422 // Skip blockquote lines when blockquotes = false.
423 // Also skip lazy continuation lines that belong to a blockquote
424 // (lines without `>` prefix that follow a blockquote line).
425 if !effective_config.blockquotes {
426 if ctx.lines[line_number - 1].blockquote.is_some() {
427 continue;
428 }
429 // Check for lazy continuation: scan backwards through
430 // non-blank lines to find if this paragraph started with
431 // a blockquote marker
432 if !line.trim().is_empty() {
433 let mut scan = line_number.saturating_sub(2);
434 loop {
435 if ctx.lines[scan].blockquote.is_some() {
436 // Found a blockquote ancestor — this is a lazy continuation
437 continue 'line_loop;
438 }
439 if lines[scan].trim().is_empty() || scan == 0 {
440 break;
441 }
442 scan -= 1;
443 }
444 }
445 }
446
447 // Skip lines that are only a URL, image ref, or link ref
448 if self.should_ignore_line(line, lines, line_idx, ctx) {
449 continue;
450 }
451 }
452
453 // In sentence-per-line mode, check if this is a single long sentence
454 // If so, emit a warning without a fix (user must manually rephrase)
455 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
456 let sentences = split_into_sentences(line.trim());
457 if sentences.len() == 1 {
458 // Single sentence that's too long - warn but don't auto-fix
459 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
460
461 let (start_line, start_col, end_line, end_col) =
462 calculate_excess_range(line_number, line, line_limit);
463
464 warnings.push(LintWarning {
465 rule_name: Some(self.name().to_string()),
466 message,
467 line: start_line,
468 column: start_col,
469 end_line,
470 end_column: end_col,
471 severity: Severity::Warning,
472 fix: None, // No auto-fix for long single sentences
473 });
474 continue;
475 }
476 // Multiple sentences will be handled by paragraph-based reflow
477 continue;
478 }
479
480 // In semantic-line-breaks mode, skip per-line checks —
481 // all reflow is handled at the paragraph level with cascading splits
482 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
483 continue;
484 }
485
486 // Don't provide fix for individual lines when reflow is enabled
487 // Paragraph-based fixes will be handled separately
488 let fix = None;
489
490 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
491
492 // Calculate precise character range for the excess portion
493 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
494
495 warnings.push(LintWarning {
496 rule_name: Some(self.name().to_string()),
497 message,
498 line: start_line,
499 column: start_col,
500 end_line,
501 end_column: end_col,
502 severity: Severity::Warning,
503 fix,
504 });
505 }
506
507 // If reflow is enabled, generate paragraph-based fixes
508 if effective_config.reflow {
509 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
510 // Merge paragraph warnings with line warnings, removing duplicates
511 for pw in paragraph_warnings {
512 // Remove any line warnings that overlap with this paragraph
513 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
514 warnings.push(pw);
515 }
516 }
517
518 Ok(warnings)
519 }
520
521 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
522 // For CLI usage, apply fixes from warnings
523 // LSP will use the warning-based fixes directly
524 let warnings = self.check(ctx)?;
525 let warnings =
526 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
527
528 // If there are no fixes, return content unchanged
529 if !warnings.iter().any(|w| w.fix.is_some()) {
530 return Ok(ctx.content.to_string());
531 }
532
533 // Apply warning-based fixes
534 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
535 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
536 }
537
538 fn as_any(&self) -> &dyn std::any::Any {
539 self
540 }
541
542 fn category(&self) -> RuleCategory {
543 RuleCategory::Whitespace
544 }
545
546 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
547 self.should_skip_with_config(ctx, &self.config)
548 }
549
550 fn default_config_section(&self) -> Option<(String, toml::Value)> {
551 let default_config = MD013Config::default();
552 let json_value = serde_json::to_value(&default_config).ok()?;
553 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
554
555 if let toml::Value::Table(table) = toml_value {
556 if !table.is_empty() {
557 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
558 } else {
559 None
560 }
561 } else {
562 None
563 }
564 }
565
566 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
567 let mut aliases = std::collections::HashMap::new();
568 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
569 aliases.insert("strict_sentences".to_string(), "require-sentence-capital".to_string());
570 aliases.insert("strict-sentences".to_string(), "require-sentence-capital".to_string());
571 Some(aliases)
572 }
573
574 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
575 where
576 Self: Sized,
577 {
578 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
579 // Use global line_length if rule-specific config still has default value
580 if rule_config.line_length.get() == 80 {
581 rule_config.line_length = config.global.line_length;
582 }
583 Box::new(Self::from_config_struct(rule_config))
584 }
585}
586
587impl MD013LineLength {
588 fn is_blockquote_content_boundary(
589 &self,
590 content: &str,
591 line_num: usize,
592 ctx: &crate::lint_context::LintContext,
593 ) -> bool {
594 let trimmed = content.trim();
595
596 trimmed.is_empty()
597 || ctx.line_info(line_num).is_some_and(|info| {
598 info.in_code_block
599 || info.in_front_matter
600 || info.in_html_block
601 || info.in_html_comment
602 || info.in_esm_block
603 || info.in_jsx_expression
604 || info.in_jsx_block
605 || info.in_mdx_comment
606 || info.in_mkdocstrings
607 || info.in_pymdown_block
608 || info.in_mkdocs_container()
609 || info.is_div_marker
610 })
611 || trimmed.starts_with('#')
612 || trimmed.starts_with("```")
613 || trimmed.starts_with("~~~")
614 || trimmed.starts_with('>')
615 || TableUtils::is_potential_table_row(content)
616 || is_list_item(trimmed)
617 || is_horizontal_rule(trimmed)
618 || (trimmed.starts_with('[') && content.contains("]:"))
619 || is_template_directive_only(content)
620 || is_standalone_attr_list(content)
621 || is_snippet_block_delimiter(content)
622 || is_github_alert_marker(trimmed)
623 || is_html_only_line(content)
624 }
625
626 fn generate_blockquote_paragraph_fix(
627 &self,
628 ctx: &crate::lint_context::LintContext,
629 config: &MD013Config,
630 lines: &[&str],
631 line_index: &LineIndex,
632 start_idx: usize,
633 line_ending: &str,
634 ) -> (Option<LintWarning>, usize) {
635 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
636 return (None, start_idx + 1);
637 };
638 let target_level = start_bq.nesting_level;
639
640 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
641 let mut i = start_idx;
642
643 while i < lines.len() {
644 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
645 break;
646 }
647
648 let line_num = i + 1;
649 if line_num > ctx.lines.len() {
650 break;
651 }
652
653 if lines[i].trim().is_empty() {
654 break;
655 }
656
657 let line_bq = ctx.lines[i].blockquote.as_deref();
658 if let Some(bq) = line_bq {
659 if bq.nesting_level != target_level {
660 break;
661 }
662
663 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
664 break;
665 }
666
667 collected.push(CollectedBlockquoteLine {
668 line_idx: i,
669 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
670 });
671 i += 1;
672 continue;
673 }
674
675 let lazy_content = lines[i].trim_start();
676 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
677 break;
678 }
679
680 collected.push(CollectedBlockquoteLine {
681 line_idx: i,
682 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
683 });
684 i += 1;
685 }
686
687 if collected.is_empty() {
688 return (None, start_idx + 1);
689 }
690
691 let next_idx = i;
692 let paragraph_start = collected[0].line_idx;
693 let end_line = collected[collected.len() - 1].line_idx;
694 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
695 let paragraph_text = line_data
696 .iter()
697 .map(|d| d.content.as_str())
698 .collect::<Vec<_>>()
699 .join(" ");
700
701 let contains_definition_list = line_data
702 .iter()
703 .any(|d| crate::utils::is_definition_list_item(&d.content));
704 if contains_definition_list {
705 return (None, next_idx);
706 }
707
708 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
709 if contains_snippets {
710 return (None, next_idx);
711 }
712
713 let needs_reflow = match config.reflow_mode {
714 ReflowMode::Normalize => {
715 self.normalize_mode_needs_reflow(line_data.iter().map(|d| d.content.as_str()), config)
716 }
717 ReflowMode::SentencePerLine => {
718 let sentences = split_into_sentences(¶graph_text);
719 sentences.len() > 1 || line_data.len() > 1
720 }
721 ReflowMode::SemanticLineBreaks => {
722 let sentences = split_into_sentences(¶graph_text);
723 sentences.len() > 1
724 || line_data.len() > 1
725 || collected
726 .iter()
727 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
728 }
729 ReflowMode::Default => collected
730 .iter()
731 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
732 };
733
734 if !needs_reflow {
735 return (None, next_idx);
736 }
737
738 let fallback_prefix = start_bq.prefix.clone();
739 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
740 let continuation_style = blockquote_continuation_style(&line_data);
741
742 let reflow_line_length = if config.line_length.is_unlimited() {
743 usize::MAX
744 } else {
745 config
746 .line_length
747 .get()
748 .saturating_sub(self.calculate_string_length(&explicit_prefix))
749 .max(1)
750 };
751
752 let reflow_options = crate::utils::text_reflow::ReflowOptions {
753 line_length: reflow_line_length,
754 break_on_sentences: true,
755 preserve_breaks: false,
756 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
757 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
758 abbreviations: config.abbreviations_for_reflow(),
759 length_mode: self.reflow_length_mode(),
760 attr_lists: ctx.flavor.supports_attr_lists(),
761 require_sentence_capital: config.require_sentence_capital,
762 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
763 Some(4)
764 } else {
765 None
766 },
767 };
768
769 let reflowed_with_style =
770 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
771
772 if reflowed_with_style.is_empty() {
773 return (None, next_idx);
774 }
775
776 let reflowed_text = reflowed_with_style.join(line_ending);
777
778 let start_range = line_index.whole_line_range(paragraph_start + 1);
779 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
780 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
781 } else {
782 line_index.whole_line_range(end_line + 1)
783 };
784 let byte_range = start_range.start..end_range.end;
785
786 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
787 format!("{reflowed_text}{line_ending}")
788 } else {
789 reflowed_text
790 };
791
792 let original_text = &ctx.content[byte_range.clone()];
793 if original_text == replacement {
794 return (None, next_idx);
795 }
796
797 let (warning_line, warning_end_line) = match config.reflow_mode {
798 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
799 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
800 ReflowMode::Default => {
801 let violating_line = collected
802 .iter()
803 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
804 .map(|line| line.line_idx + 1)
805 .unwrap_or(paragraph_start + 1);
806 (violating_line, violating_line)
807 }
808 };
809
810 let warning = LintWarning {
811 rule_name: Some(self.name().to_string()),
812 message: match config.reflow_mode {
813 ReflowMode::Normalize => format!(
814 "Paragraph could be normalized to use line length of {} characters",
815 config.line_length.get()
816 ),
817 ReflowMode::SentencePerLine => {
818 let num_sentences = split_into_sentences(¶graph_text).len();
819 if line_data.len() == 1 {
820 format!("Line contains {num_sentences} sentences (one sentence per line required)")
821 } else {
822 let num_lines = line_data.len();
823 format!(
824 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
825 )
826 }
827 }
828 ReflowMode::SemanticLineBreaks => {
829 let num_sentences = split_into_sentences(¶graph_text).len();
830 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
831 }
832 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
833 },
834 line: warning_line,
835 column: 1,
836 end_line: warning_end_line,
837 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
838 severity: Severity::Warning,
839 fix: Some(crate::rule::Fix {
840 range: byte_range,
841 replacement,
842 }),
843 };
844
845 (Some(warning), next_idx)
846 }
847
848 /// Generate paragraph-based fixes
849 fn generate_paragraph_fixes(
850 &self,
851 ctx: &crate::lint_context::LintContext,
852 config: &MD013Config,
853 lines: &[&str],
854 ) -> Vec<LintWarning> {
855 let mut warnings = Vec::new();
856 let line_index = LineIndex::new(ctx.content);
857
858 // Detect the content's line ending style to preserve it in replacements.
859 // The LSP receives content from editors which may use CRLF (Windows).
860 // Replacements must match the original line endings to avoid false positives.
861 let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
862
863 let mut i = 0;
864 while i < lines.len() {
865 let line_num = i + 1;
866
867 // Handle blockquote paragraphs with style-preserving reflow.
868 // Skip blockquotes when blockquotes=false or paragraphs=false
869 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
870 if !config.blockquotes || !config.paragraphs {
871 // Skip past all blockquote lines (explicit and lazy continuations).
872 // A lazy continuation is a non-blank line without `>` that follows
873 // a blockquote line and isn't a structural element.
874 let mut saw_explicit_bq = false;
875 while i < lines.len() && i < ctx.lines.len() {
876 if ctx.lines[i].blockquote.is_some() {
877 saw_explicit_bq = true;
878 i += 1;
879 } else if saw_explicit_bq
880 && !lines[i].trim().is_empty()
881 && !lines[i].trim_start().starts_with('#')
882 && !lines[i].trim_start().starts_with('>')
883 {
884 // Lazy continuation of preceding blockquote
885 i += 1;
886 } else {
887 break;
888 }
889 }
890 continue;
891 }
892 let (warning, next_idx) =
893 self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
894 if let Some(warning) = warning {
895 warnings.push(warning);
896 }
897 i = next_idx;
898 continue;
899 }
900
901 // Skip special structures (but NOT MkDocs containers - those get special handling)
902 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
903 info.in_code_block
904 || info.in_front_matter
905 || info.in_html_block
906 || info.in_html_comment
907 || info.in_esm_block
908 || info.in_jsx_expression
909 || info.in_jsx_block
910 || info.in_mdx_comment
911 || info.in_mkdocstrings
912 || info.in_pymdown_block
913 });
914
915 // Skip link reference definitions but NOT footnote definitions.
916 // Footnote definitions (`[^id]: prose`) contain reflowable text,
917 // while link reference definitions (`[ref]: URL`) contain URLs
918 // that cannot be shortened.
919 let is_link_ref_def =
920 lines[i].trim().starts_with('[') && !lines[i].trim().starts_with("[^") && lines[i].contains("]:");
921
922 if should_skip_due_to_line_info
923 || lines[i].trim().starts_with('#')
924 || TableUtils::is_potential_table_row(lines[i])
925 || lines[i].trim().is_empty()
926 || is_horizontal_rule(lines[i].trim())
927 || is_template_directive_only(lines[i])
928 || is_link_ref_def
929 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
930 || is_html_only_line(lines[i])
931 {
932 i += 1;
933 continue;
934 }
935
936 // Handle footnote definitions: `[^id]: prose text that can be reflowed`
937 // Supports multi-paragraph footnotes with code blocks, blockquotes,
938 // tables, and lists preserved verbatim.
939 // Validate structure: must start with `[^`, contain `]:`, and the ID
940 // must not contain `[` or `]` (prevents false matches on nested brackets)
941 if lines[i].trim().starts_with("[^") && lines[i].contains("]:") && {
942 let after_caret = &lines[i].trim()[2..];
943 after_caret
944 .find("]:")
945 .is_some_and(|pos| pos > 0 && !after_caret[..pos].contains(['[', ']']))
946 } {
947 let footnote_start = i;
948 let line = lines[i];
949
950 // Extract the prefix `[^id]:`
951 let Some(colon_pos) = line.find("]:") else {
952 i += 1;
953 continue;
954 };
955 let prefix_end = colon_pos + 2;
956 let prefix = &line[..prefix_end];
957
958 // Content starts after `]: ` (with optional space)
959 let content_start = if line[prefix_end..].starts_with(' ') {
960 prefix_end + 1
961 } else {
962 prefix_end
963 };
964 let first_content = &line[content_start..];
965
966 // CommonMark footnotes use 4-space continuation indent
967 const FN_INDENT: usize = 4;
968
969 // --- Line classification for footnote content ---
970 #[derive(Debug, Clone)]
971 enum FnLineType {
972 Content(String),
973 Verbatim(String, usize), // preserved text, original indent
974 Empty,
975 }
976
977 // Helper: compute visual indent (tabs = 4 spaces)
978 let visual_indent = |s: &str| -> usize {
979 s.chars()
980 .take_while(|c| c.is_whitespace())
981 .map(|c| if c == '\t' { 4 } else { 1 })
982 .sum::<usize>()
983 };
984
985 // Helper: check if a trimmed line is a fence marker (homogeneous chars)
986 let is_fence = |s: &str| -> bool {
987 let t = s.trim();
988 let fence_char = t.chars().next();
989 matches!(fence_char, Some('`') | Some('~'))
990 && t.chars().take_while(|&c| c == fence_char.unwrap()).count() >= 3
991 };
992
993 // Helper: check if a trimmed line is a setext underline
994 let is_setext_underline = |s: &str| -> bool {
995 let t = s.trim();
996 !t.is_empty()
997 && (t.chars().all(|c| c == '=' || c == ' ') || t.chars().all(|c| c == '-' || c == ' '))
998 && t.contains(['=', '-'])
999 };
1000
1001 // Deferred body: `[^id]:\n content` — first line has no content,
1002 // actual content starts on the next indented line
1003 let deferred_body = first_content.trim().is_empty();
1004
1005 // Collect all lines belonging to this footnote definition
1006 let mut fn_lines: Vec<FnLineType> = Vec::new();
1007 if !deferred_body {
1008 fn_lines.push(FnLineType::Content(first_content.to_string()));
1009 }
1010 let mut last_consumed = i;
1011 i += 1;
1012
1013 // Strip only the footnote continuation indent, preserving
1014 // internal indentation (e.g., code block body indent)
1015 let strip_fn_indent = |s: &str| -> String {
1016 let mut chars = s.chars();
1017 let mut stripped = 0;
1018 while stripped < FN_INDENT {
1019 match chars.next() {
1020 Some('\t') => stripped += 4,
1021 Some(c) if c.is_whitespace() => stripped += 1,
1022 _ => break,
1023 }
1024 }
1025 chars.as_str().to_string()
1026 };
1027
1028 let mut in_fenced_code = false;
1029 let mut consecutive_blanks = 0u32;
1030
1031 while i < lines.len() {
1032 let next = lines[i];
1033 let next_trimmed = next.trim();
1034
1035 // Blank line handling
1036 if next_trimmed.is_empty() {
1037 consecutive_blanks += 1;
1038 // 2+ consecutive blanks terminate the footnote
1039 if consecutive_blanks >= 2 {
1040 break;
1041 }
1042
1043 // Inside a fenced code block, blank lines are part of the code
1044 if in_fenced_code {
1045 consecutive_blanks = 0; // Don't count blanks inside code blocks
1046 fn_lines.push(FnLineType::Verbatim(String::new(), 0));
1047 last_consumed = i;
1048 i += 1;
1049 continue;
1050 }
1051
1052 // Peek ahead: if next non-blank line is indented >= FN_INDENT,
1053 // this blank is an internal paragraph separator
1054 if i + 1 < lines.len() {
1055 let peek = lines[i + 1];
1056 let peek_indent = visual_indent(peek);
1057 if !peek.trim().is_empty() && peek_indent >= FN_INDENT {
1058 fn_lines.push(FnLineType::Empty);
1059 last_consumed = i;
1060 i += 1;
1061 continue;
1062 }
1063 }
1064 // No valid continuation after blank — end of footnote
1065 break;
1066 }
1067
1068 consecutive_blanks = 0;
1069 let indent = visual_indent(next);
1070
1071 // Not indented enough — end of footnote
1072 if indent < FN_INDENT {
1073 break;
1074 }
1075
1076 // Inside a fenced code block: everything is verbatim until closing fence
1077 if in_fenced_code {
1078 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1079 if is_fence(next_trimmed) {
1080 in_fenced_code = false;
1081 }
1082 last_consumed = i;
1083 i += 1;
1084 continue;
1085 }
1086
1087 // Fence opener — start verbatim code block
1088 if is_fence(next_trimmed) {
1089 in_fenced_code = true;
1090 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1091 last_consumed = i;
1092 i += 1;
1093 continue;
1094 }
1095
1096 // Indented code block: indent >= FN_INDENT + 4 (= 8 spaces)
1097 if indent >= FN_INDENT + 4 {
1098 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1099 last_consumed = i;
1100 i += 1;
1101 continue;
1102 }
1103
1104 // Structural content that must be preserved verbatim
1105 if next_trimmed.starts_with('#')
1106 || is_list_item(next_trimmed)
1107 || next_trimmed.starts_with('>')
1108 || TableUtils::is_potential_table_row(next_trimmed)
1109 || is_setext_underline(next_trimmed)
1110 || is_horizontal_rule(next_trimmed)
1111 || crate::utils::mkdocs_footnotes::is_footnote_definition(next_trimmed)
1112 {
1113 // Preserve verbatim: blockquotes, tables, lists, setext
1114 // underlines, and horizontal rules inside the footnote
1115 if next_trimmed.starts_with('>')
1116 || TableUtils::is_potential_table_row(next_trimmed)
1117 || is_list_item(next_trimmed)
1118 || is_setext_underline(next_trimmed)
1119 || is_horizontal_rule(next_trimmed)
1120 {
1121 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1122 last_consumed = i;
1123 i += 1;
1124 continue;
1125 }
1126 // Headings, new footnote defs, link refs — end the footnote
1127 break;
1128 }
1129
1130 // Link reference definitions inside footnotes are not reflowable
1131 if next_trimmed.starts_with('[')
1132 && !next_trimmed.starts_with("[^")
1133 && next_trimmed.contains("]:")
1134 && LINK_REF_PATTERN.is_match(next_trimmed)
1135 {
1136 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1137 last_consumed = i;
1138 i += 1;
1139 continue;
1140 }
1141
1142 // HTML-only lines inside footnotes are not reflowable
1143 if is_html_only_line(next_trimmed) {
1144 fn_lines.push(FnLineType::Verbatim(strip_fn_indent(next), indent));
1145 last_consumed = i;
1146 i += 1;
1147 continue;
1148 }
1149
1150 // Regular prose content
1151 fn_lines.push(FnLineType::Content(next_trimmed.to_string()));
1152 last_consumed = i;
1153 i += 1;
1154 }
1155
1156 // Nothing collected or only empty lines
1157 if fn_lines.iter().all(|l| matches!(l, FnLineType::Empty)) || fn_lines.is_empty() {
1158 continue;
1159 }
1160
1161 // --- Group into blocks ---
1162 #[derive(Debug)]
1163 enum FnBlock {
1164 Paragraph(Vec<String>),
1165 Verbatim(Vec<(String, usize)>), // (content, indent) preserved as-is
1166 }
1167
1168 let mut blocks: Vec<FnBlock> = Vec::new();
1169 let mut current_para: Vec<String> = Vec::new();
1170 let mut current_verbatim: Vec<(String, usize)> = Vec::new();
1171
1172 for fl in &fn_lines {
1173 match fl {
1174 FnLineType::Content(s) => {
1175 if !current_verbatim.is_empty() {
1176 blocks.push(FnBlock::Verbatim(std::mem::take(&mut current_verbatim)));
1177 }
1178 current_para.push(s.clone());
1179 }
1180 FnLineType::Verbatim(s, indent) => {
1181 if !current_para.is_empty() {
1182 blocks.push(FnBlock::Paragraph(std::mem::take(&mut current_para)));
1183 }
1184 current_verbatim.push((s.clone(), *indent));
1185 }
1186 FnLineType::Empty => {
1187 if !current_para.is_empty() {
1188 blocks.push(FnBlock::Paragraph(std::mem::take(&mut current_para)));
1189 }
1190 if !current_verbatim.is_empty() {
1191 blocks.push(FnBlock::Verbatim(std::mem::take(&mut current_verbatim)));
1192 }
1193 }
1194 }
1195 }
1196 if !current_para.is_empty() {
1197 blocks.push(FnBlock::Paragraph(current_para));
1198 }
1199 if !current_verbatim.is_empty() {
1200 blocks.push(FnBlock::Verbatim(current_verbatim));
1201 }
1202
1203 // --- Reflow paragraphs and reconstruct ---
1204 let prefix_display_width = prefix.chars().count() + 1; // +1 for space
1205 let reflow_line_length = if config.line_length.is_unlimited() {
1206 usize::MAX
1207 } else {
1208 config
1209 .line_length
1210 .get()
1211 .saturating_sub(FN_INDENT.max(prefix_display_width))
1212 .max(20)
1213 };
1214 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1215 line_length: reflow_line_length,
1216 break_on_sentences: true,
1217 preserve_breaks: false,
1218 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1219 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1220 abbreviations: config.abbreviations_for_reflow(),
1221 length_mode: self.reflow_length_mode(),
1222 attr_lists: ctx.flavor.supports_attr_lists(),
1223 require_sentence_capital: config.require_sentence_capital,
1224 max_list_continuation_indent: None,
1225 };
1226
1227 let indent_str = " ".repeat(FN_INDENT);
1228 let mut result_lines: Vec<String> = Vec::new();
1229 let mut is_first_block = true;
1230
1231 for block in &blocks {
1232 match block {
1233 FnBlock::Paragraph(para_lines) => {
1234 let paragraph_text = para_lines.join(" ");
1235 let paragraph_text = paragraph_text.trim();
1236 if paragraph_text.is_empty() {
1237 continue;
1238 }
1239
1240 let reflowed = crate::utils::text_reflow::reflow_line(paragraph_text, &reflow_options);
1241 if reflowed.is_empty() {
1242 continue;
1243 }
1244
1245 // Blank line separator between blocks
1246 if !result_lines.is_empty() {
1247 result_lines.push(String::new());
1248 }
1249
1250 for (idx, rline) in reflowed.iter().enumerate() {
1251 if is_first_block && idx == 0 {
1252 result_lines.push(format!("{prefix} {rline}"));
1253 } else {
1254 result_lines.push(format!("{indent_str}{rline}"));
1255 }
1256 }
1257 is_first_block = false;
1258 }
1259 FnBlock::Verbatim(verb_lines) => {
1260 // Blank line separator between blocks
1261 if !result_lines.is_empty() {
1262 result_lines.push(String::new());
1263 }
1264
1265 if is_first_block {
1266 // Verbatim as first block in a deferred-body footnote
1267 if deferred_body {
1268 result_lines.push(prefix.to_string());
1269 }
1270 is_first_block = false;
1271 }
1272 for (content, _orig_indent) in verb_lines {
1273 result_lines.push(format!("{indent_str}{content}"));
1274 }
1275 }
1276 }
1277 }
1278
1279 // If nothing was produced, skip
1280 if result_lines.is_empty() {
1281 continue;
1282 }
1283
1284 let reflowed_text = result_lines.join(line_ending);
1285
1286 // Calculate byte range using last_consumed
1287 let start_range = line_index.whole_line_range(footnote_start + 1);
1288 let end_range = if last_consumed == lines.len() - 1 && !ctx.content.ends_with('\n') {
1289 line_index.line_text_range(last_consumed + 1, 1, lines[last_consumed].len() + 1)
1290 } else {
1291 line_index.whole_line_range(last_consumed + 1)
1292 };
1293 let byte_range = start_range.start..end_range.end;
1294
1295 let replacement = if last_consumed < lines.len() - 1 || ctx.content.ends_with('\n') {
1296 format!("{reflowed_text}{line_ending}")
1297 } else {
1298 reflowed_text
1299 };
1300
1301 let original_text = &ctx.content[byte_range.clone()];
1302 let max_length = (footnote_start..=last_consumed)
1303 .map(|idx| self.calculate_effective_length(lines[idx]))
1304 .max()
1305 .unwrap_or(0);
1306 let line_limit = if config.line_length.is_unlimited() {
1307 usize::MAX
1308 } else {
1309 config.line_length.get()
1310 };
1311 if original_text != replacement && max_length > line_limit {
1312 warnings.push(LintWarning {
1313 rule_name: Some(self.name().to_string()),
1314 message: format!(
1315 "Line length {} exceeds {} characters",
1316 max_length,
1317 config.line_length.get()
1318 ),
1319 line: footnote_start + 1,
1320 column: 1,
1321 end_line: last_consumed + 1,
1322 end_column: lines[last_consumed].len() + 1,
1323 severity: Severity::Warning,
1324 fix: Some(crate::rule::Fix {
1325 range: byte_range,
1326 replacement,
1327 }),
1328 });
1329 }
1330 continue;
1331 }
1332
1333 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
1334 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
1335 // Skip admonition/tab marker lines — only reflow their indented content
1336 let current_line = lines[i];
1337 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
1338 i += 1;
1339 continue;
1340 }
1341
1342 let container_start = i;
1343
1344 // Detect the actual indent level from the first content line
1345 // (supports nested admonitions with 8+ spaces)
1346 let first_line = lines[i];
1347 let base_indent_len = first_line.len() - first_line.trim_start().len();
1348 let base_indent: String = " ".repeat(base_indent_len);
1349
1350 // Collect consecutive MkDocs container paragraph lines
1351 let mut container_lines: Vec<&str> = Vec::new();
1352 while i < lines.len() {
1353 let current_line_num = i + 1;
1354 let line_info = ctx.line_info(current_line_num);
1355
1356 // Stop if we leave the MkDocs container
1357 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
1358 break;
1359 }
1360
1361 let line = lines[i];
1362
1363 // Stop at paragraph boundaries within the container
1364 if line.trim().is_empty() {
1365 break;
1366 }
1367
1368 // Skip list items, code blocks, headings, HTML-only lines within containers
1369 if is_list_item(line.trim())
1370 || line.trim().starts_with("```")
1371 || line.trim().starts_with("~~~")
1372 || line.trim().starts_with('#')
1373 || is_html_only_line(line)
1374 {
1375 break;
1376 }
1377
1378 container_lines.push(line);
1379 i += 1;
1380 }
1381
1382 if container_lines.is_empty() {
1383 // Must advance i to avoid infinite loop when we encounter
1384 // non-paragraph content (code block, list, heading, empty line)
1385 // at the start of an MkDocs container
1386 i += 1;
1387 continue;
1388 }
1389
1390 // Strip the base indent from each line and join for reflow
1391 let stripped_lines: Vec<&str> = container_lines
1392 .iter()
1393 .map(|line| {
1394 if line.starts_with(&base_indent) {
1395 &line[base_indent_len..]
1396 } else {
1397 line.trim_start()
1398 }
1399 })
1400 .collect();
1401 let paragraph_text = stripped_lines.join(" ");
1402
1403 // Check if reflow is needed
1404 let needs_reflow = match config.reflow_mode {
1405 ReflowMode::Normalize => self.normalize_mode_needs_reflow(container_lines.iter().copied(), config),
1406 ReflowMode::SentencePerLine => {
1407 let sentences = split_into_sentences(¶graph_text);
1408 sentences.len() > 1 || container_lines.len() > 1
1409 }
1410 ReflowMode::SemanticLineBreaks => {
1411 let sentences = split_into_sentences(¶graph_text);
1412 sentences.len() > 1
1413 || container_lines.len() > 1
1414 || container_lines
1415 .iter()
1416 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1417 }
1418 ReflowMode::Default => container_lines
1419 .iter()
1420 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
1421 };
1422
1423 if !needs_reflow {
1424 continue;
1425 }
1426
1427 // Calculate byte range for this container paragraph
1428 let start_range = line_index.whole_line_range(container_start + 1);
1429 let end_line = container_start + container_lines.len() - 1;
1430 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1431 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1432 } else {
1433 line_index.whole_line_range(end_line + 1)
1434 };
1435 let byte_range = start_range.start..end_range.end;
1436
1437 // Reflow with adjusted line length (accounting for the 4-space indent)
1438 let reflow_line_length = if config.line_length.is_unlimited() {
1439 usize::MAX
1440 } else {
1441 config.line_length.get().saturating_sub(base_indent_len).max(1)
1442 };
1443 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1444 line_length: reflow_line_length,
1445 break_on_sentences: true,
1446 preserve_breaks: false,
1447 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1448 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1449 abbreviations: config.abbreviations_for_reflow(),
1450 length_mode: self.reflow_length_mode(),
1451 attr_lists: ctx.flavor.supports_attr_lists(),
1452 require_sentence_capital: config.require_sentence_capital,
1453 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
1454 Some(4)
1455 } else {
1456 None
1457 },
1458 };
1459 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1460
1461 // Re-add the 4-space indent to each reflowed line
1462 let reflowed_with_indent: Vec<String> =
1463 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
1464 let reflowed_text = reflowed_with_indent.join(line_ending);
1465
1466 // Preserve trailing newline
1467 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1468 format!("{reflowed_text}{line_ending}")
1469 } else {
1470 reflowed_text
1471 };
1472
1473 // Only generate a warning if the replacement is different
1474 let original_text = &ctx.content[byte_range.clone()];
1475 if original_text != replacement {
1476 warnings.push(LintWarning {
1477 rule_name: Some(self.name().to_string()),
1478 message: format!(
1479 "Line length {} exceeds {} characters (in MkDocs container)",
1480 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
1481 config.line_length.get()
1482 ),
1483 line: container_start + 1,
1484 column: 1,
1485 end_line: end_line + 1,
1486 end_column: lines[end_line].len() + 1,
1487 severity: Severity::Warning,
1488 fix: Some(crate::rule::Fix {
1489 range: byte_range,
1490 replacement,
1491 }),
1492 });
1493 }
1494 continue;
1495 }
1496
1497 // Helper function to detect semantic line markers
1498 let is_semantic_line = |content: &str| -> bool {
1499 let trimmed = content.trim_start();
1500 let semantic_markers = [
1501 "NOTE:",
1502 "WARNING:",
1503 "IMPORTANT:",
1504 "CAUTION:",
1505 "TIP:",
1506 "DANGER:",
1507 "HINT:",
1508 "INFO:",
1509 ];
1510 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
1511 };
1512
1513 // Helper function to detect fence markers (opening or closing)
1514 let is_fence_marker = |content: &str| -> bool {
1515 let trimmed = content.trim_start();
1516 trimmed.starts_with("```") || trimmed.starts_with("~~~")
1517 };
1518
1519 // Check if this is a list item - handle it specially
1520 let trimmed = lines[i].trim();
1521 if is_list_item(trimmed) {
1522 // Collect the entire list item including continuation lines
1523 let list_start = i;
1524 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
1525 let marker_len = marker.len();
1526
1527 // Checkbox ([ ]/[x]/[X]) is inline content, not part of the list marker.
1528 // Use the base bullet/number marker width for continuation recognition
1529 // so that continuation lines at 2+ spaces are collected for "- [ ] " items.
1530 let base_marker_len = if marker.contains("[ ] ") || marker.contains("[x] ") || marker.contains("[X] ") {
1531 marker.find('[').unwrap_or(marker_len)
1532 } else {
1533 marker_len
1534 };
1535
1536 // MkDocs flavor requires at least 4 spaces for list continuation
1537 // after a blank line (multi-paragraph list items). For non-blank
1538 // continuation (lines directly following the marker line), use
1539 // the natural marker width so that 2-space indent is recognized.
1540 let item_indent = ctx.lines[i].indent;
1541 let min_continuation_indent = if ctx.flavor.requires_strict_list_indent() {
1542 // Use 4-space relative indent from the list item's nesting level
1543 item_indent + (base_marker_len - item_indent).max(4)
1544 } else {
1545 marker_len
1546 };
1547 let content_continuation_indent = base_marker_len;
1548
1549 // Track lines and their types (content, code block, fence, nested list)
1550 #[derive(Clone)]
1551 enum LineType {
1552 Content(String),
1553 CodeBlock(String, usize), // content and original indent
1554 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
1555 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1556 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
1557 AdmonitionHeader(String, usize), // header text (e.g. "!!! note") and original indent
1558 AdmonitionContent(String, usize), // body content text and original indent
1559 Empty,
1560 }
1561
1562 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1563 i += 1;
1564
1565 // Collect continuation lines using ctx.lines for metadata
1566 while i < lines.len() {
1567 let line_info = &ctx.lines[i];
1568
1569 // Use pre-computed is_blank from ctx
1570 if line_info.is_blank {
1571 // Empty line - check if next line is indented (part of list item)
1572 if i + 1 < lines.len() {
1573 let next_info = &ctx.lines[i + 1];
1574
1575 // Check if next line is indented enough to be continuation
1576 if !next_info.is_blank && next_info.indent >= min_continuation_indent {
1577 // This blank line is between paragraphs/blocks in the list item
1578 list_item_lines.push(LineType::Empty);
1579 i += 1;
1580 continue;
1581 }
1582 }
1583 // No indented line after blank, end of list item
1584 break;
1585 }
1586
1587 // Use pre-computed indent from ctx
1588 let indent = line_info.indent;
1589
1590 // Valid continuation must be indented at least content_continuation_indent.
1591 // For non-blank continuation, use marker_len (e.g. 2 for "- ").
1592 // MkDocs strict 4-space requirement applies only after blank lines.
1593 if indent >= content_continuation_indent {
1594 let trimmed = line_info.content(ctx.content).trim();
1595
1596 // Check for MkDocs admonition lines inside list items BEFORE
1597 // checking in_code_block. Lines inside code blocks within
1598 // admonitions have both in_admonition and in_code_block set;
1599 // admonition membership takes priority so the entire admonition
1600 // structure (including embedded code blocks) is preserved.
1601 if line_info.in_admonition {
1602 let raw_content = line_info.content(ctx.content);
1603 if mkdocs_admonitions::is_admonition_start(raw_content) {
1604 let header_text = raw_content[indent..].trim_end().to_string();
1605 list_item_lines.push(LineType::AdmonitionHeader(header_text, indent));
1606 } else {
1607 let body_text = raw_content[indent..].trim_end().to_string();
1608 list_item_lines.push(LineType::AdmonitionContent(body_text, indent));
1609 }
1610 i += 1;
1611 continue;
1612 }
1613
1614 // Use pre-computed in_code_block from ctx
1615 if line_info.in_code_block {
1616 list_item_lines.push(LineType::CodeBlock(
1617 line_info.content(ctx.content)[indent..].to_string(),
1618 indent,
1619 ));
1620 i += 1;
1621 continue;
1622 }
1623
1624 // Check if this is a SIBLING list item (breaks parent)
1625 // Nested lists are indented >= marker_len and are PART of the parent item
1626 // Siblings are at indent < marker_len (at or before parent marker)
1627 if is_list_item(trimmed) && indent < marker_len {
1628 // This is a sibling item at same or higher level - end parent item
1629 break;
1630 }
1631
1632 // Nested list items are always processed independently
1633 // by the outer loop, so break when we encounter one.
1634 // If a blank line was collected before this, uncollect it
1635 // so the outer loop preserves the blank between parent and nested.
1636 if is_list_item(trimmed) && indent >= marker_len {
1637 if matches!(list_item_lines.last(), Some(LineType::Empty)) {
1638 list_item_lines.pop();
1639 i -= 1;
1640 }
1641 break;
1642 }
1643
1644 // Normal continuation vs indented code block.
1645 // Use min_continuation_indent for the threshold since
1646 // code blocks start 4 spaces beyond the expected content
1647 // level (which is min_continuation_indent for MkDocs).
1648 if indent <= min_continuation_indent + 3 {
1649 // Extract content (remove indentation and trailing whitespace)
1650 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1651 // See: https://github.com/rvben/rumdl/issues/76
1652 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1653
1654 // Check if this is a div marker (::: opening or closing)
1655 // These must be preserved on their own line, not merged into paragraphs
1656 if line_info.is_div_marker {
1657 list_item_lines.push(LineType::DivMarker(content));
1658 }
1659 // Check if this is a fence marker (opening or closing)
1660 // These should be treated as code block lines, not paragraph content
1661 else if is_fence_marker(&content) {
1662 list_item_lines.push(LineType::CodeBlock(content, indent));
1663 }
1664 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1665 else if is_semantic_line(&content) {
1666 list_item_lines.push(LineType::SemanticLine(content));
1667 }
1668 // Check if this is a snippet block delimiter (-8<- or --8<--)
1669 // These must be preserved on their own lines for MkDocs Snippets extension
1670 else if is_snippet_block_delimiter(&content) {
1671 list_item_lines.push(LineType::SnippetLine(content));
1672 } else {
1673 list_item_lines.push(LineType::Content(content));
1674 }
1675 i += 1;
1676 } else {
1677 // indent >= min_continuation_indent + 4: indented code block
1678 list_item_lines.push(LineType::CodeBlock(
1679 line_info.content(ctx.content)[indent..].to_string(),
1680 indent,
1681 ));
1682 i += 1;
1683 }
1684 } else {
1685 // Not indented enough, end of list item
1686 break;
1687 }
1688 }
1689
1690 // Determine the output continuation indent.
1691 // Normalize/Default modes canonicalize to min_continuation_indent
1692 // (fixing over-indented continuation). Semantic/SentencePerLine
1693 // modes preserve the user's actual indent since they only fix
1694 // line breaking, not indentation.
1695 let indent_size = match config.reflow_mode {
1696 ReflowMode::SemanticLineBreaks | ReflowMode::SentencePerLine => {
1697 // Find indent of the first plain text continuation line,
1698 // skipping the marker line (index 0), nested list items,
1699 // code blocks, and blank lines.
1700 list_item_lines
1701 .iter()
1702 .enumerate()
1703 .skip(1)
1704 .find_map(|(k, lt)| {
1705 if matches!(lt, LineType::Content(_)) {
1706 Some(ctx.lines[list_start + k].indent)
1707 } else {
1708 None
1709 }
1710 })
1711 .unwrap_or(min_continuation_indent)
1712 }
1713 _ => min_continuation_indent,
1714 };
1715 // For checkbox items in mkdocs flavor, enforce minimum indent so
1716 // continuation lines use the structural list indent (4), not the
1717 // content-aligned indent (6) which Python-Markdown doesn't support
1718 let has_checkbox = base_marker_len < marker_len;
1719 let indent_size = if has_checkbox && ctx.flavor.requires_strict_list_indent() {
1720 indent_size.max(min_continuation_indent)
1721 } else {
1722 indent_size
1723 };
1724 let expected_indent = " ".repeat(indent_size);
1725
1726 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1727 #[derive(Clone)]
1728 enum Block {
1729 Paragraph(Vec<String>),
1730 Code {
1731 lines: Vec<(String, usize)>, // (content, indent) pairs
1732 has_preceding_blank: bool, // Whether there was a blank line before this block
1733 },
1734 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1735 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
1736 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1737 Html {
1738 lines: Vec<String>, // HTML content preserved exactly as-is
1739 has_preceding_blank: bool, // Whether there was a blank line before this block
1740 },
1741 Admonition {
1742 header: String, // e.g. "!!! note" or "??? warning \"Title\""
1743 header_indent: usize, // original indent of the header line
1744 content_lines: Vec<(String, usize)>, // (text, original_indent) pairs for body lines
1745 },
1746 }
1747
1748 // HTML tag detection helpers
1749 // Block-level HTML tags that should trigger HTML block detection
1750 const BLOCK_LEVEL_TAGS: &[&str] = &[
1751 "div",
1752 "details",
1753 "summary",
1754 "section",
1755 "article",
1756 "header",
1757 "footer",
1758 "nav",
1759 "aside",
1760 "main",
1761 "table",
1762 "thead",
1763 "tbody",
1764 "tfoot",
1765 "tr",
1766 "td",
1767 "th",
1768 "ul",
1769 "ol",
1770 "li",
1771 "dl",
1772 "dt",
1773 "dd",
1774 "pre",
1775 "blockquote",
1776 "figure",
1777 "figcaption",
1778 "form",
1779 "fieldset",
1780 "legend",
1781 "hr",
1782 "p",
1783 "h1",
1784 "h2",
1785 "h3",
1786 "h4",
1787 "h5",
1788 "h6",
1789 "style",
1790 "script",
1791 "noscript",
1792 ];
1793
1794 fn is_block_html_opening_tag(line: &str) -> Option<String> {
1795 let trimmed = line.trim();
1796
1797 // Check for HTML comments
1798 if trimmed.starts_with("<!--") {
1799 return Some("!--".to_string());
1800 }
1801
1802 // Check for opening tags
1803 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1804 // Extract tag name from <tagname ...> or <tagname>
1805 let after_bracket = &trimmed[1..];
1806 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1807 let tag_name = after_bracket[..end].to_lowercase();
1808
1809 // Only treat as block if it's a known block-level tag
1810 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1811 return Some(tag_name);
1812 }
1813 }
1814 }
1815 None
1816 }
1817
1818 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1819 let trimmed = line.trim();
1820
1821 // Special handling for HTML comments
1822 if tag_name == "!--" {
1823 return trimmed.ends_with("-->");
1824 }
1825
1826 // Check for closing tags: </tagname> or </tagname ...>
1827 trimmed.starts_with(&format!("</{tag_name}>"))
1828 || trimmed.starts_with(&format!("</{tag_name} "))
1829 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1830 }
1831
1832 fn is_self_closing_tag(line: &str) -> bool {
1833 let trimmed = line.trim();
1834 trimmed.ends_with("/>")
1835 }
1836
1837 let mut blocks: Vec<Block> = Vec::new();
1838 let mut current_paragraph: Vec<String> = Vec::new();
1839 let mut current_code_block: Vec<(String, usize)> = Vec::new();
1840 let mut current_html_block: Vec<String> = Vec::new();
1841 let mut html_tag_stack: Vec<String> = Vec::new();
1842 let mut in_code = false;
1843 let mut in_html_block = false;
1844 let mut had_preceding_blank = false; // Track if we just saw an empty line
1845 let mut code_block_has_preceding_blank = false; // Track blank before current code block
1846 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1847
1848 // Track admonition context for block building
1849 let mut in_admonition_block = false;
1850 let mut admonition_header: Option<(String, usize)> = None; // (header_text, indent)
1851 let mut admonition_content: Vec<(String, usize)> = Vec::new();
1852
1853 // Flush any pending admonition block into `blocks`
1854 let flush_admonition = |blocks: &mut Vec<Block>,
1855 in_admonition: &mut bool,
1856 header: &mut Option<(String, usize)>,
1857 content: &mut Vec<(String, usize)>| {
1858 if *in_admonition {
1859 if let Some((h, hi)) = header.take() {
1860 blocks.push(Block::Admonition {
1861 header: h,
1862 header_indent: hi,
1863 content_lines: std::mem::take(content),
1864 });
1865 }
1866 *in_admonition = false;
1867 }
1868 };
1869
1870 for line in &list_item_lines {
1871 match line {
1872 LineType::Empty => {
1873 if in_admonition_block {
1874 // Blank lines inside admonitions separate paragraphs within the body
1875 admonition_content.push((String::new(), 0));
1876 } else if in_code {
1877 current_code_block.push((String::new(), 0));
1878 } else if in_html_block {
1879 // Allow blank lines inside HTML blocks
1880 current_html_block.push(String::new());
1881 } else if !current_paragraph.is_empty() {
1882 blocks.push(Block::Paragraph(current_paragraph.clone()));
1883 current_paragraph.clear();
1884 }
1885 // Mark that we saw a blank line
1886 had_preceding_blank = true;
1887 }
1888 LineType::Content(content) => {
1889 flush_admonition(
1890 &mut blocks,
1891 &mut in_admonition_block,
1892 &mut admonition_header,
1893 &mut admonition_content,
1894 );
1895 // Check if we're currently in an HTML block
1896 if in_html_block {
1897 current_html_block.push(content.clone());
1898
1899 // Check if this line closes any open HTML tags
1900 if let Some(last_tag) = html_tag_stack.last() {
1901 if is_html_closing_tag(content, last_tag) {
1902 html_tag_stack.pop();
1903
1904 // If stack is empty, HTML block is complete
1905 if html_tag_stack.is_empty() {
1906 blocks.push(Block::Html {
1907 lines: current_html_block.clone(),
1908 has_preceding_blank: html_block_has_preceding_blank,
1909 });
1910 current_html_block.clear();
1911 in_html_block = false;
1912 }
1913 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1914 // Nested opening tag within HTML block
1915 if !is_self_closing_tag(content) {
1916 html_tag_stack.push(new_tag);
1917 }
1918 }
1919 }
1920 had_preceding_blank = false;
1921 } else {
1922 // Not in HTML block - check if this line starts one
1923 if let Some(tag_name) = is_block_html_opening_tag(content) {
1924 // Flush current paragraph before starting HTML block
1925 if in_code {
1926 blocks.push(Block::Code {
1927 lines: current_code_block.clone(),
1928 has_preceding_blank: code_block_has_preceding_blank,
1929 });
1930 current_code_block.clear();
1931 in_code = false;
1932 } else if !current_paragraph.is_empty() {
1933 blocks.push(Block::Paragraph(current_paragraph.clone()));
1934 current_paragraph.clear();
1935 }
1936
1937 // Start new HTML block
1938 in_html_block = true;
1939 html_block_has_preceding_blank = had_preceding_blank;
1940 current_html_block.push(content.clone());
1941
1942 // Check if it's self-closing or needs a closing tag
1943 if is_self_closing_tag(content) {
1944 // Self-closing tag - complete the HTML block immediately
1945 blocks.push(Block::Html {
1946 lines: current_html_block.clone(),
1947 has_preceding_blank: html_block_has_preceding_blank,
1948 });
1949 current_html_block.clear();
1950 in_html_block = false;
1951 } else {
1952 // Regular opening tag - push to stack
1953 html_tag_stack.push(tag_name);
1954 }
1955 } else {
1956 // Regular content line - add to paragraph
1957 if in_code {
1958 // Switching from code to content
1959 blocks.push(Block::Code {
1960 lines: current_code_block.clone(),
1961 has_preceding_blank: code_block_has_preceding_blank,
1962 });
1963 current_code_block.clear();
1964 in_code = false;
1965 }
1966 current_paragraph.push(content.clone());
1967 }
1968 had_preceding_blank = false; // Reset after content
1969 }
1970 }
1971 LineType::CodeBlock(content, indent) => {
1972 flush_admonition(
1973 &mut blocks,
1974 &mut in_admonition_block,
1975 &mut admonition_header,
1976 &mut admonition_content,
1977 );
1978 if in_html_block {
1979 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1980 blocks.push(Block::Html {
1981 lines: current_html_block.clone(),
1982 has_preceding_blank: html_block_has_preceding_blank,
1983 });
1984 current_html_block.clear();
1985 html_tag_stack.clear();
1986 in_html_block = false;
1987 }
1988 if !in_code {
1989 // Switching from content to code
1990 if !current_paragraph.is_empty() {
1991 blocks.push(Block::Paragraph(current_paragraph.clone()));
1992 current_paragraph.clear();
1993 }
1994 in_code = true;
1995 // Record whether there was a blank line before this code block
1996 code_block_has_preceding_blank = had_preceding_blank;
1997 }
1998 current_code_block.push((content.clone(), *indent));
1999 had_preceding_blank = false; // Reset after code
2000 }
2001 LineType::SemanticLine(content) => {
2002 // Semantic lines are standalone - flush any current block and add as separate block
2003 flush_admonition(
2004 &mut blocks,
2005 &mut in_admonition_block,
2006 &mut admonition_header,
2007 &mut admonition_content,
2008 );
2009 if in_code {
2010 blocks.push(Block::Code {
2011 lines: current_code_block.clone(),
2012 has_preceding_blank: code_block_has_preceding_blank,
2013 });
2014 current_code_block.clear();
2015 in_code = false;
2016 } else if in_html_block {
2017 blocks.push(Block::Html {
2018 lines: current_html_block.clone(),
2019 has_preceding_blank: html_block_has_preceding_blank,
2020 });
2021 current_html_block.clear();
2022 html_tag_stack.clear();
2023 in_html_block = false;
2024 } else if !current_paragraph.is_empty() {
2025 blocks.push(Block::Paragraph(current_paragraph.clone()));
2026 current_paragraph.clear();
2027 }
2028 // Add semantic line as its own block
2029 blocks.push(Block::SemanticLine(content.clone()));
2030 had_preceding_blank = false; // Reset after semantic line
2031 }
2032 LineType::SnippetLine(content) => {
2033 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
2034 // Unlike semantic lines, snippet lines don't add extra blank lines around them
2035 flush_admonition(
2036 &mut blocks,
2037 &mut in_admonition_block,
2038 &mut admonition_header,
2039 &mut admonition_content,
2040 );
2041 if in_code {
2042 blocks.push(Block::Code {
2043 lines: current_code_block.clone(),
2044 has_preceding_blank: code_block_has_preceding_blank,
2045 });
2046 current_code_block.clear();
2047 in_code = false;
2048 } else if in_html_block {
2049 blocks.push(Block::Html {
2050 lines: current_html_block.clone(),
2051 has_preceding_blank: html_block_has_preceding_blank,
2052 });
2053 current_html_block.clear();
2054 html_tag_stack.clear();
2055 in_html_block = false;
2056 } else if !current_paragraph.is_empty() {
2057 blocks.push(Block::Paragraph(current_paragraph.clone()));
2058 current_paragraph.clear();
2059 }
2060 // Add snippet line as its own block
2061 blocks.push(Block::SnippetLine(content.clone()));
2062 had_preceding_blank = false;
2063 }
2064 LineType::DivMarker(content) => {
2065 // Div markers (::: opening or closing) are standalone structural delimiters
2066 // Flush any current block and add as separate block
2067 flush_admonition(
2068 &mut blocks,
2069 &mut in_admonition_block,
2070 &mut admonition_header,
2071 &mut admonition_content,
2072 );
2073 if in_code {
2074 blocks.push(Block::Code {
2075 lines: current_code_block.clone(),
2076 has_preceding_blank: code_block_has_preceding_blank,
2077 });
2078 current_code_block.clear();
2079 in_code = false;
2080 } else if in_html_block {
2081 blocks.push(Block::Html {
2082 lines: current_html_block.clone(),
2083 has_preceding_blank: html_block_has_preceding_blank,
2084 });
2085 current_html_block.clear();
2086 html_tag_stack.clear();
2087 in_html_block = false;
2088 } else if !current_paragraph.is_empty() {
2089 blocks.push(Block::Paragraph(current_paragraph.clone()));
2090 current_paragraph.clear();
2091 }
2092 blocks.push(Block::DivMarker(content.clone()));
2093 had_preceding_blank = false;
2094 }
2095 LineType::AdmonitionHeader(header_text, indent) => {
2096 flush_admonition(
2097 &mut blocks,
2098 &mut in_admonition_block,
2099 &mut admonition_header,
2100 &mut admonition_content,
2101 );
2102 // Flush other current blocks
2103 if in_code {
2104 blocks.push(Block::Code {
2105 lines: current_code_block.clone(),
2106 has_preceding_blank: code_block_has_preceding_blank,
2107 });
2108 current_code_block.clear();
2109 in_code = false;
2110 } else if in_html_block {
2111 blocks.push(Block::Html {
2112 lines: current_html_block.clone(),
2113 has_preceding_blank: html_block_has_preceding_blank,
2114 });
2115 current_html_block.clear();
2116 html_tag_stack.clear();
2117 in_html_block = false;
2118 } else if !current_paragraph.is_empty() {
2119 blocks.push(Block::Paragraph(current_paragraph.clone()));
2120 current_paragraph.clear();
2121 }
2122 // Start new admonition block
2123 in_admonition_block = true;
2124 admonition_header = Some((header_text.clone(), *indent));
2125 admonition_content.clear();
2126 had_preceding_blank = false;
2127 }
2128 LineType::AdmonitionContent(content, indent) => {
2129 if in_admonition_block {
2130 // Add to current admonition body
2131 admonition_content.push((content.clone(), *indent));
2132 } else {
2133 // Admonition content without a header should not happen,
2134 // but treat it as regular content to avoid data loss
2135 current_paragraph.push(content.clone());
2136 }
2137 had_preceding_blank = false;
2138 }
2139 }
2140 }
2141
2142 // Push all remaining pending blocks independently
2143 flush_admonition(
2144 &mut blocks,
2145 &mut in_admonition_block,
2146 &mut admonition_header,
2147 &mut admonition_content,
2148 );
2149 if in_code && !current_code_block.is_empty() {
2150 blocks.push(Block::Code {
2151 lines: current_code_block,
2152 has_preceding_blank: code_block_has_preceding_blank,
2153 });
2154 }
2155 if in_html_block && !current_html_block.is_empty() {
2156 blocks.push(Block::Html {
2157 lines: current_html_block,
2158 has_preceding_blank: html_block_has_preceding_blank,
2159 });
2160 }
2161 if !current_paragraph.is_empty() {
2162 blocks.push(Block::Paragraph(current_paragraph));
2163 }
2164
2165 // Helper: check if a line (raw source or stripped content) is exempt
2166 // from line-length checks. Link reference definitions are always exempt;
2167 // standalone link/image lines are exempt when strict mode is off.
2168 // Also checks content after stripping list markers, since list item
2169 // continuation lines may contain link ref defs.
2170 let is_exempt_line = |raw_line: &str| -> bool {
2171 let trimmed = raw_line.trim();
2172 // Link reference definitions: always exempt
2173 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
2174 return true;
2175 }
2176 // Also check after stripping list markers (for list item content)
2177 if is_list_item(trimmed) {
2178 let (_, content) = extract_list_marker_and_content(trimmed);
2179 let content_trimmed = content.trim();
2180 if content_trimmed.starts_with('[')
2181 && content_trimmed.contains("]:")
2182 && LINK_REF_PATTERN.is_match(content_trimmed)
2183 {
2184 return true;
2185 }
2186 }
2187 // Standalone link/image lines: exempt when not strict
2188 if !config.strict && is_standalone_link_or_image_line(raw_line) {
2189 return true;
2190 }
2191 // HTML-only lines: exempt when not strict
2192 if !config.strict && is_html_only_line(raw_line) {
2193 return true;
2194 }
2195 false
2196 };
2197
2198 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
2199 // Exclude link reference definitions and standalone link lines from content
2200 // so they don't pollute combined_content or trigger false reflow.
2201 let content_lines: Vec<String> = list_item_lines
2202 .iter()
2203 .filter_map(|line| {
2204 if let LineType::Content(s) = line {
2205 if is_exempt_line(s) {
2206 return None;
2207 }
2208 Some(s.clone())
2209 } else {
2210 None
2211 }
2212 })
2213 .collect();
2214
2215 // Check if we need to reflow this list item
2216 // We check the combined content to see if it exceeds length limits
2217 let combined_content = content_lines.join(" ").trim().to_string();
2218
2219 // Helper to check if we should reflow in normalize mode
2220 let should_normalize = || {
2221 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
2222 // DO normalize if it has plain text content that spans multiple lines
2223 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
2224 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
2225 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
2226 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
2227 let has_admonitions = blocks.iter().any(|b| matches!(b, Block::Admonition { .. }));
2228 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
2229
2230 // If we have structural blocks but no paragraphs, don't normalize
2231 if (has_code_blocks
2232 || has_semantic_lines
2233 || has_snippet_lines
2234 || has_div_markers
2235 || has_admonitions)
2236 && !has_paragraphs
2237 {
2238 return false;
2239 }
2240
2241 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
2242 if has_paragraphs {
2243 // Count only paragraphs that contain at least one non-exempt line.
2244 // Paragraphs consisting entirely of link ref defs or standalone links
2245 // should not trigger normalization.
2246 let paragraph_count = blocks
2247 .iter()
2248 .filter(|b| {
2249 if let Block::Paragraph(para_lines) = b {
2250 !para_lines.iter().all(|line| is_exempt_line(line))
2251 } else {
2252 false
2253 }
2254 })
2255 .count();
2256 if paragraph_count > 1 {
2257 // Multiple non-exempt paragraph blocks should be normalized
2258 return true;
2259 }
2260
2261 // Single paragraph block: normalize if it has multiple content lines
2262 if content_lines.len() > 1 {
2263 return true;
2264 }
2265 }
2266
2267 false
2268 };
2269
2270 let needs_reflow = match config.reflow_mode {
2271 ReflowMode::Normalize => {
2272 // Only reflow if:
2273 // 1. Any non-exempt paragraph, when joined, exceeds the limit, OR
2274 // 2. Any admonition content line exceeds the limit, OR
2275 // 3. The list item should be normalized (has multi-line plain text)
2276 let any_paragraph_exceeds = blocks.iter().any(|block| match block {
2277 Block::Paragraph(para_lines) => {
2278 if para_lines.iter().all(|line| is_exempt_line(line)) {
2279 return false;
2280 }
2281 let joined = para_lines.join(" ");
2282 let with_marker = format!("{}{}", " ".repeat(indent_size), joined.trim());
2283 self.calculate_effective_length(&with_marker) > config.line_length.get()
2284 }
2285 Block::Admonition {
2286 content_lines,
2287 header_indent,
2288 ..
2289 } => content_lines.iter().any(|(content, indent)| {
2290 if content.is_empty() {
2291 return false;
2292 }
2293 let with_indent = format!("{}{}", " ".repeat(*indent.max(header_indent)), content);
2294 self.calculate_effective_length(&with_indent) > config.line_length.get()
2295 }),
2296 _ => false,
2297 });
2298 if any_paragraph_exceeds {
2299 true
2300 } else {
2301 should_normalize()
2302 }
2303 }
2304 ReflowMode::SentencePerLine => {
2305 // Check if list item has multiple sentences
2306 let sentences = split_into_sentences(&combined_content);
2307 sentences.len() > 1
2308 }
2309 ReflowMode::SemanticLineBreaks => {
2310 let sentences = split_into_sentences(&combined_content);
2311 sentences.len() > 1
2312 || (list_start..i).any(|line_idx| {
2313 let line = lines[line_idx];
2314 let trimmed = line.trim();
2315 if trimmed.is_empty() || is_exempt_line(line) {
2316 return false;
2317 }
2318 self.calculate_effective_length(line) > config.line_length.get()
2319 })
2320 }
2321 ReflowMode::Default => {
2322 // In default mode, only reflow if any individual non-exempt line exceeds limit
2323 (list_start..i).any(|line_idx| {
2324 let line = lines[line_idx];
2325 let trimmed = line.trim();
2326 // Skip blank lines and exempt lines
2327 if trimmed.is_empty() || is_exempt_line(line) {
2328 return false;
2329 }
2330 self.calculate_effective_length(line) > config.line_length.get()
2331 })
2332 }
2333 };
2334
2335 if needs_reflow {
2336 let start_range = line_index.whole_line_range(list_start + 1);
2337 let end_line = i - 1;
2338 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2339 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2340 } else {
2341 line_index.whole_line_range(end_line + 1)
2342 };
2343 let byte_range = start_range.start..end_range.end;
2344
2345 // Reflow each block (paragraphs only, preserve code blocks)
2346 // When line_length = 0 (no limit), use a very large value for reflow
2347 let reflow_line_length = if config.line_length.is_unlimited() {
2348 usize::MAX
2349 } else {
2350 config.line_length.get().saturating_sub(indent_size).max(1)
2351 };
2352 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2353 line_length: reflow_line_length,
2354 break_on_sentences: true,
2355 preserve_breaks: false,
2356 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2357 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2358 abbreviations: config.abbreviations_for_reflow(),
2359 length_mode: self.reflow_length_mode(),
2360 attr_lists: ctx.flavor.supports_attr_lists(),
2361 require_sentence_capital: config.require_sentence_capital,
2362 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2363 Some(4)
2364 } else {
2365 None
2366 },
2367 };
2368
2369 let mut result: Vec<String> = Vec::new();
2370 let mut is_first_block = true;
2371
2372 for (block_idx, block) in blocks.iter().enumerate() {
2373 match block {
2374 Block::Paragraph(para_lines) => {
2375 // If every line in this paragraph is exempt (link ref defs,
2376 // standalone links), preserve the paragraph verbatim instead
2377 // of reflowing it. Reflowing would corrupt link ref defs.
2378 let all_exempt = para_lines.iter().all(|line| is_exempt_line(line));
2379
2380 if all_exempt {
2381 for (idx, line) in para_lines.iter().enumerate() {
2382 if is_first_block && idx == 0 {
2383 result.push(format!("{marker}{line}"));
2384 is_first_block = false;
2385 } else {
2386 result.push(format!("{expected_indent}{line}"));
2387 }
2388 }
2389 } else {
2390 // Split the paragraph into segments at hard break boundaries
2391 // Each segment can be reflowed independently
2392 let segments = split_into_segments(para_lines);
2393
2394 for (segment_idx, segment) in segments.iter().enumerate() {
2395 // Check if this segment ends with a hard break and what type
2396 let hard_break_type = segment.last().and_then(|line| {
2397 let line = line.strip_suffix('\r').unwrap_or(line);
2398 if line.ends_with('\\') {
2399 Some("\\")
2400 } else if line.ends_with(" ") {
2401 Some(" ")
2402 } else {
2403 None
2404 }
2405 });
2406
2407 // Join and reflow the segment (removing the hard break marker for processing)
2408 let segment_for_reflow: Vec<String> = segment
2409 .iter()
2410 .map(|line| {
2411 // Strip hard break marker (2 spaces or backslash) for reflow processing
2412 if line.ends_with('\\') {
2413 line[..line.len() - 1].trim_end().to_string()
2414 } else if line.ends_with(" ") {
2415 line[..line.len() - 2].trim_end().to_string()
2416 } else {
2417 line.clone()
2418 }
2419 })
2420 .collect();
2421
2422 let segment_text = segment_for_reflow.join(" ").trim().to_string();
2423 if !segment_text.is_empty() {
2424 let reflowed =
2425 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
2426
2427 if is_first_block && segment_idx == 0 {
2428 // First segment of first block starts with marker
2429 result.push(format!("{marker}{}", reflowed[0]));
2430 for line in reflowed.iter().skip(1) {
2431 result.push(format!("{expected_indent}{line}"));
2432 }
2433 is_first_block = false;
2434 } else {
2435 // Subsequent segments
2436 for line in reflowed {
2437 result.push(format!("{expected_indent}{line}"));
2438 }
2439 }
2440
2441 // If this segment had a hard break, add it back to the last line
2442 // Preserve the original hard break format (backslash or two spaces)
2443 if let Some(break_marker) = hard_break_type
2444 && let Some(last_line) = result.last_mut()
2445 {
2446 last_line.push_str(break_marker);
2447 }
2448 }
2449 }
2450 }
2451
2452 // Add blank line after paragraph block if there's a next block.
2453 // Check if next block is a code block that doesn't want a preceding blank.
2454 // Also don't add blank lines before snippet lines (they should stay tight).
2455 // Only add if not already ending with one (avoids double blanks).
2456 if block_idx < blocks.len() - 1 {
2457 let next_block = &blocks[block_idx + 1];
2458 let should_add_blank = match next_block {
2459 Block::Code {
2460 has_preceding_blank, ..
2461 } => *has_preceding_blank,
2462 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2463 _ => true, // For all other blocks, add blank line
2464 };
2465 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2466 {
2467 result.push(String::new());
2468 }
2469 }
2470 }
2471 Block::Code {
2472 lines: code_lines,
2473 has_preceding_blank: _,
2474 } => {
2475 // Preserve code blocks as-is with original indentation
2476 // NOTE: Blank line before code block is handled by the previous block
2477 // (see paragraph block's logic above)
2478
2479 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
2480 if is_first_block && idx == 0 {
2481 // First line of first block gets marker
2482 result.push(format!(
2483 "{marker}{}",
2484 " ".repeat(orig_indent - marker_len) + content
2485 ));
2486 is_first_block = false;
2487 } else if content.is_empty() {
2488 result.push(String::new());
2489 } else {
2490 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2491 }
2492 }
2493 }
2494 Block::SemanticLine(content) => {
2495 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
2496 // Only add blank before if not already ending with one.
2497 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
2498 result.push(String::new());
2499 }
2500
2501 if is_first_block {
2502 // First block starts with marker
2503 result.push(format!("{marker}{content}"));
2504 is_first_block = false;
2505 } else {
2506 // Subsequent blocks use expected indent
2507 result.push(format!("{expected_indent}{content}"));
2508 }
2509
2510 // Add blank line after semantic line if there's a next block.
2511 // Only add if not already ending with one.
2512 if block_idx < blocks.len() - 1 {
2513 let next_block = &blocks[block_idx + 1];
2514 let should_add_blank = match next_block {
2515 Block::Code {
2516 has_preceding_blank, ..
2517 } => *has_preceding_blank,
2518 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2519 _ => true, // For all other blocks, add blank line
2520 };
2521 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2522 {
2523 result.push(String::new());
2524 }
2525 }
2526 }
2527 Block::SnippetLine(content) => {
2528 // Preserve snippet delimiters (-8<-) as-is on their own line
2529 // Unlike semantic lines, snippet lines don't add extra blank lines
2530 if is_first_block {
2531 // First block starts with marker
2532 result.push(format!("{marker}{content}"));
2533 is_first_block = false;
2534 } else {
2535 // Subsequent blocks use expected indent
2536 result.push(format!("{expected_indent}{content}"));
2537 }
2538 // No blank lines added before or after snippet delimiters
2539 }
2540 Block::DivMarker(content) => {
2541 // Preserve div markers (::: opening or closing) as-is on their own line
2542 if is_first_block {
2543 result.push(format!("{marker}{content}"));
2544 is_first_block = false;
2545 } else {
2546 result.push(format!("{expected_indent}{content}"));
2547 }
2548 }
2549 Block::Html {
2550 lines: html_lines,
2551 has_preceding_blank: _,
2552 } => {
2553 // Preserve HTML blocks exactly as-is with original indentation
2554 // NOTE: Blank line before HTML block is handled by the previous block
2555
2556 for (idx, line) in html_lines.iter().enumerate() {
2557 if is_first_block && idx == 0 {
2558 // First line of first block gets marker
2559 result.push(format!("{marker}{line}"));
2560 is_first_block = false;
2561 } else if line.is_empty() {
2562 // Preserve blank lines inside HTML blocks
2563 result.push(String::new());
2564 } else {
2565 // Preserve lines with their original content (already includes indentation)
2566 result.push(format!("{expected_indent}{line}"));
2567 }
2568 }
2569
2570 // Add blank line after HTML block if there's a next block.
2571 // Only add if not already ending with one (avoids double blanks
2572 // when the HTML block itself contained a trailing blank line).
2573 if block_idx < blocks.len() - 1 {
2574 let next_block = &blocks[block_idx + 1];
2575 let should_add_blank = match next_block {
2576 Block::Code {
2577 has_preceding_blank, ..
2578 } => *has_preceding_blank,
2579 Block::Html {
2580 has_preceding_blank, ..
2581 } => *has_preceding_blank,
2582 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2583 _ => true, // For all other blocks, add blank line
2584 };
2585 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2586 {
2587 result.push(String::new());
2588 }
2589 }
2590 }
2591 Block::Admonition {
2592 header,
2593 header_indent,
2594 content_lines: admon_lines,
2595 } => {
2596 // Reconstruct admonition block with header at original indent
2597 // and body content reflowed to fit within the line length limit
2598
2599 // Add blank line before admonition if not first block
2600 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
2601 result.push(String::new());
2602 }
2603
2604 // Output the header at its original indent
2605 let header_indent_str = " ".repeat(*header_indent);
2606 if is_first_block {
2607 result.push(format!(
2608 "{marker}{}",
2609 " ".repeat(header_indent.saturating_sub(marker_len)) + header
2610 ));
2611 is_first_block = false;
2612 } else {
2613 result.push(format!("{header_indent_str}{header}"));
2614 }
2615
2616 // Derive body indent from the first non-empty content line's
2617 // stored indent, falling back to header_indent + 4 for
2618 // empty-body admonitions
2619 let body_indent = admon_lines
2620 .iter()
2621 .find(|(content, _)| !content.is_empty())
2622 .map(|(_, indent)| *indent)
2623 .unwrap_or(header_indent + 4);
2624 let body_indent_str = " ".repeat(body_indent);
2625
2626 // Segment body content into code blocks (verbatim) and
2627 // text paragraphs (reflowable), separated by blank lines.
2628 // Code lines store (content, orig_indent) to reconstruct
2629 // internal indentation relative to body_indent.
2630 enum AdmonSegment {
2631 Text(Vec<String>),
2632 Code(Vec<(String, usize)>),
2633 }
2634
2635 let mut segments: Vec<AdmonSegment> = Vec::new();
2636 let mut current_text: Vec<String> = Vec::new();
2637 let mut current_code: Vec<(String, usize)> = Vec::new();
2638 let mut in_admon_code = false;
2639 // Track the opening fence character so closing fences
2640 // must match (backticks close backticks, tildes close tildes)
2641 let mut fence_char: char = '`';
2642
2643 // Opening fences: ``` or ~~~ followed by optional info string
2644 let get_opening_fence = |s: &str| -> Option<(char, usize)> {
2645 let t = s.trim_start();
2646 if t.starts_with("```") {
2647 Some(('`', t.bytes().take_while(|&b| b == b'`').count()))
2648 } else if t.starts_with("~~~") {
2649 Some(('~', t.bytes().take_while(|&b| b == b'~').count()))
2650 } else {
2651 None
2652 }
2653 };
2654 // Closing fences: ONLY fence chars + optional trailing spaces
2655 let get_closing_fence = |s: &str| -> Option<(char, usize)> {
2656 let t = s.trim();
2657 if t.starts_with("```") && t.bytes().all(|b| b == b'`') {
2658 Some(('`', t.len()))
2659 } else if t.starts_with("~~~") && t.bytes().all(|b| b == b'~') {
2660 Some(('~', t.len()))
2661 } else {
2662 None
2663 }
2664 };
2665 let mut fence_len: usize = 3;
2666
2667 for (content, orig_indent) in admon_lines {
2668 if in_admon_code {
2669 // Closing fence must use the same character, be
2670 // at least as long, and have no info string
2671 if let Some((ch, len)) = get_closing_fence(content)
2672 && ch == fence_char
2673 && len >= fence_len
2674 {
2675 current_code.push((content.clone(), *orig_indent));
2676 in_admon_code = false;
2677 segments.push(AdmonSegment::Code(std::mem::take(&mut current_code)));
2678 continue;
2679 }
2680 current_code.push((content.clone(), *orig_indent));
2681 } else if let Some((ch, len)) = get_opening_fence(content) {
2682 if !current_text.is_empty() {
2683 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2684 }
2685 in_admon_code = true;
2686 fence_char = ch;
2687 fence_len = len;
2688 current_code.push((content.clone(), *orig_indent));
2689 } else if content.is_empty() {
2690 if !current_text.is_empty() {
2691 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2692 }
2693 } else {
2694 current_text.push(content.clone());
2695 }
2696 }
2697 if in_admon_code && !current_code.is_empty() {
2698 segments.push(AdmonSegment::Code(std::mem::take(&mut current_code)));
2699 }
2700 if !current_text.is_empty() {
2701 segments.push(AdmonSegment::Text(std::mem::take(&mut current_text)));
2702 }
2703
2704 // Build reflow options once for all text segments
2705 let admon_reflow_length = if config.line_length.is_unlimited() {
2706 usize::MAX
2707 } else {
2708 config.line_length.get().saturating_sub(body_indent).max(1)
2709 };
2710
2711 let admon_reflow_options = crate::utils::text_reflow::ReflowOptions {
2712 line_length: admon_reflow_length,
2713 break_on_sentences: true,
2714 preserve_breaks: false,
2715 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2716 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2717 abbreviations: config.abbreviations_for_reflow(),
2718 length_mode: self.reflow_length_mode(),
2719 attr_lists: ctx.flavor.supports_attr_lists(),
2720 require_sentence_capital: config.require_sentence_capital,
2721 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
2722 Some(4)
2723 } else {
2724 None
2725 },
2726 };
2727
2728 // Output each segment
2729 for segment in &segments {
2730 // Blank line before each segment (after the header or previous segment)
2731 result.push(String::new());
2732
2733 match segment {
2734 AdmonSegment::Code(lines) => {
2735 for (line, orig_indent) in lines {
2736 if line.is_empty() {
2737 // Preserve blank lines inside code blocks
2738 result.push(String::new());
2739 } else {
2740 // Reconstruct with body_indent + any extra
2741 // indentation the line had beyond body_indent
2742 let extra = orig_indent.saturating_sub(body_indent);
2743 let indent_str = " ".repeat(body_indent + extra);
2744 result.push(format!("{indent_str}{line}"));
2745 }
2746 }
2747 }
2748 AdmonSegment::Text(lines) => {
2749 let paragraph_text = lines.join(" ").trim().to_string();
2750 if paragraph_text.is_empty() {
2751 continue;
2752 }
2753 let reflowed = crate::utils::text_reflow::reflow_line(
2754 ¶graph_text,
2755 &admon_reflow_options,
2756 );
2757 for line in &reflowed {
2758 result.push(format!("{body_indent_str}{line}"));
2759 }
2760 }
2761 }
2762 }
2763
2764 // Add blank line after admonition if there's a next block
2765 if block_idx < blocks.len() - 1 {
2766 let next_block = &blocks[block_idx + 1];
2767 let should_add_blank = match next_block {
2768 Block::Code {
2769 has_preceding_blank, ..
2770 } => *has_preceding_blank,
2771 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2772 _ => true,
2773 };
2774 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2775 {
2776 result.push(String::new());
2777 }
2778 }
2779 }
2780 }
2781 }
2782
2783 let reflowed_text = result.join(line_ending);
2784
2785 // Preserve trailing newline
2786 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2787 format!("{reflowed_text}{line_ending}")
2788 } else {
2789 reflowed_text
2790 };
2791
2792 // Get the original text to compare
2793 let original_text = &ctx.content[byte_range.clone()];
2794
2795 // Only generate a warning if the replacement is different from the original
2796 if original_text != replacement {
2797 // Generate an appropriate message based on why reflow is needed
2798 let message = match config.reflow_mode {
2799 ReflowMode::SentencePerLine => {
2800 let num_sentences = split_into_sentences(&combined_content).len();
2801 let num_lines = content_lines.len();
2802 if num_lines == 1 {
2803 // Single line with multiple sentences
2804 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2805 } else {
2806 // Multiple lines - could be split sentences or mixed
2807 format!(
2808 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
2809 )
2810 }
2811 }
2812 ReflowMode::SemanticLineBreaks => {
2813 let num_sentences = split_into_sentences(&combined_content).len();
2814 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
2815 }
2816 ReflowMode::Normalize => {
2817 // Find the longest non-exempt paragraph when joined
2818 let max_para_length = blocks
2819 .iter()
2820 .filter_map(|block| {
2821 if let Block::Paragraph(para_lines) = block {
2822 if para_lines.iter().all(|line| is_exempt_line(line)) {
2823 return None;
2824 }
2825 let joined = para_lines.join(" ");
2826 let with_indent = format!("{}{}", " ".repeat(indent_size), joined.trim());
2827 Some(self.calculate_effective_length(&with_indent))
2828 } else {
2829 None
2830 }
2831 })
2832 .max()
2833 .unwrap_or(0);
2834 if max_para_length > config.line_length.get() {
2835 format!(
2836 "Line length {} exceeds {} characters",
2837 max_para_length,
2838 config.line_length.get()
2839 )
2840 } else {
2841 "Multi-line content can be normalized".to_string()
2842 }
2843 }
2844 ReflowMode::Default => {
2845 // Report the actual longest non-exempt line, not the combined content
2846 let max_length = (list_start..i)
2847 .filter(|&line_idx| {
2848 let line = lines[line_idx];
2849 let trimmed = line.trim();
2850 !trimmed.is_empty() && !is_exempt_line(line)
2851 })
2852 .map(|line_idx| self.calculate_effective_length(lines[line_idx]))
2853 .max()
2854 .unwrap_or(0);
2855 format!(
2856 "Line length {} exceeds {} characters",
2857 max_length,
2858 config.line_length.get()
2859 )
2860 }
2861 };
2862
2863 warnings.push(LintWarning {
2864 rule_name: Some(self.name().to_string()),
2865 message,
2866 line: list_start + 1,
2867 column: 1,
2868 end_line: end_line + 1,
2869 end_column: lines[end_line].len() + 1,
2870 severity: Severity::Warning,
2871 fix: Some(crate::rule::Fix {
2872 range: byte_range,
2873 replacement,
2874 }),
2875 });
2876 }
2877 }
2878 continue;
2879 }
2880
2881 // Found start of a paragraph - collect all lines in it
2882 let paragraph_start = i;
2883 let mut paragraph_lines = vec![lines[i]];
2884 i += 1;
2885
2886 while i < lines.len() {
2887 let next_line = lines[i];
2888 let next_line_num = i + 1;
2889 let next_trimmed = next_line.trim();
2890
2891 // Stop at paragraph boundaries
2892 if next_trimmed.is_empty()
2893 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
2894 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
2895 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
2896 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
2897 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
2898 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
2899 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_block)
2900 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
2901 || ctx
2902 .line_info(next_line_num)
2903 .is_some_and(|info| info.in_mkdocs_container())
2904 || (next_line_num > 0
2905 && next_line_num <= ctx.lines.len()
2906 && ctx.lines[next_line_num - 1].blockquote.is_some())
2907 || next_trimmed.starts_with('#')
2908 || TableUtils::is_potential_table_row(next_line)
2909 || is_list_item(next_trimmed)
2910 || is_horizontal_rule(next_trimmed)
2911 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
2912 || is_template_directive_only(next_line)
2913 || is_standalone_attr_list(next_line)
2914 || is_snippet_block_delimiter(next_line)
2915 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
2916 || is_html_only_line(next_line)
2917 {
2918 break;
2919 }
2920
2921 // Check if the previous line ends with a hard break (2+ spaces or backslash)
2922 if i > 0 && has_hard_break(lines[i - 1]) {
2923 // Don't include lines after hard breaks in the same paragraph
2924 break;
2925 }
2926
2927 paragraph_lines.push(next_line);
2928 i += 1;
2929 }
2930
2931 // Compute the common leading indent of all non-empty paragraph lines,
2932 // but only when those lines are structurally inside a list block.
2933 // Indented continuation lines that follow a nested list arrive here
2934 // with their structural indentation intact (e.g. 2 spaces for a
2935 // top-level list item). Stripping the indent before reflow and
2936 // re-applying it afterward prevents the fixer from moving those
2937 // lines to column 0.
2938 //
2939 // The list-block guard is essential: top-level paragraphs that happen
2940 // to start with spaces (insignificant in Markdown) must NOT have those
2941 // spaces preserved or injected by the fixer.
2942 let common_indent: String = if ctx.is_in_list_block(paragraph_start + 1) {
2943 let min_len = paragraph_lines
2944 .iter()
2945 .filter(|l| !l.trim().is_empty())
2946 .map(|l| l.len() - l.trim_start().len())
2947 .min()
2948 .unwrap_or(0);
2949 paragraph_lines
2950 .iter()
2951 .find(|l| !l.trim().is_empty())
2952 .map(|l| l[..min_len].to_string())
2953 .unwrap_or_default()
2954 } else {
2955 String::new()
2956 };
2957
2958 // Combine paragraph lines into a single string for processing.
2959 // This must be done BEFORE the needs_reflow check for sentence-per-line mode.
2960 let paragraph_text = if common_indent.is_empty() {
2961 paragraph_lines.join(" ")
2962 } else {
2963 paragraph_lines
2964 .iter()
2965 .map(|l| {
2966 if l.starts_with(common_indent.as_str()) {
2967 &l[common_indent.len()..]
2968 } else {
2969 l.trim_start()
2970 }
2971 })
2972 .collect::<Vec<_>>()
2973 .join(" ")
2974 };
2975
2976 // Skip reflowing if this paragraph contains definition list items
2977 // Definition lists are multi-line structures that should not be joined
2978 let contains_definition_list = paragraph_lines
2979 .iter()
2980 .any(|line| crate::utils::is_definition_list_item(line));
2981
2982 if contains_definition_list {
2983 // Don't reflow definition lists - skip this paragraph
2984 i = paragraph_start + paragraph_lines.len();
2985 continue;
2986 }
2987
2988 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2989 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2990 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2991
2992 if contains_snippets {
2993 // Don't reflow Snippets blocks - skip this paragraph
2994 i = paragraph_start + paragraph_lines.len();
2995 continue;
2996 }
2997
2998 // Check if this paragraph needs reflowing
2999 let needs_reflow = match config.reflow_mode {
3000 ReflowMode::Normalize => self.normalize_mode_needs_reflow(paragraph_lines.iter().copied(), config),
3001 ReflowMode::SentencePerLine => {
3002 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
3003 // Note: we check the joined text because sentences can span multiple lines
3004 let sentences = split_into_sentences(¶graph_text);
3005
3006 // Always reflow if multiple sentences on one line
3007 if sentences.len() > 1 {
3008 true
3009 } else if paragraph_lines.len() > 1 {
3010 // For single-sentence paragraphs spanning multiple lines:
3011 // Reflow if they COULD fit on one line (respecting line-length constraint)
3012 if config.line_length.is_unlimited() {
3013 // No line-length constraint - always join single sentences
3014 true
3015 } else {
3016 // Only join if it fits within line-length.
3017 // paragraph_text has the common indent stripped, so add it
3018 // back to get the true output length before comparing.
3019 let effective_length =
3020 self.calculate_effective_length(¶graph_text) + common_indent.len();
3021 effective_length <= config.line_length.get()
3022 }
3023 } else {
3024 false
3025 }
3026 }
3027 ReflowMode::SemanticLineBreaks => {
3028 let sentences = split_into_sentences(¶graph_text);
3029 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
3030 sentences.len() > 1
3031 || paragraph_lines.len() > 1
3032 || paragraph_lines
3033 .iter()
3034 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
3035 }
3036 ReflowMode::Default => {
3037 // In default mode, only reflow if lines exceed limit
3038 paragraph_lines
3039 .iter()
3040 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
3041 }
3042 };
3043
3044 if needs_reflow {
3045 // Calculate byte range for this paragraph
3046 // Use whole_line_range for each line and combine
3047 let start_range = line_index.whole_line_range(paragraph_start + 1);
3048 let end_line = paragraph_start + paragraph_lines.len() - 1;
3049
3050 // For the last line, we want to preserve any trailing newline
3051 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
3052 // Last line without trailing newline - use line_text_range
3053 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
3054 } else {
3055 // Not the last line or has trailing newline - use whole_line_range
3056 line_index.whole_line_range(end_line + 1)
3057 };
3058
3059 let byte_range = start_range.start..end_range.end;
3060
3061 // Check if the paragraph ends with a hard break and what type
3062 let hard_break_type = paragraph_lines.last().and_then(|line| {
3063 let line = line.strip_suffix('\r').unwrap_or(line);
3064 if line.ends_with('\\') {
3065 Some("\\")
3066 } else if line.ends_with(" ") {
3067 Some(" ")
3068 } else {
3069 None
3070 }
3071 });
3072
3073 // Reflow the paragraph
3074 // When line_length = 0 (no limit), use a very large value for reflow
3075 let reflow_line_length = if config.line_length.is_unlimited() {
3076 usize::MAX
3077 } else {
3078 config.line_length.get()
3079 };
3080 let reflow_options = crate::utils::text_reflow::ReflowOptions {
3081 line_length: reflow_line_length,
3082 break_on_sentences: true,
3083 preserve_breaks: false,
3084 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
3085 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
3086 abbreviations: config.abbreviations_for_reflow(),
3087 length_mode: self.reflow_length_mode(),
3088 attr_lists: ctx.flavor.supports_attr_lists(),
3089 require_sentence_capital: config.require_sentence_capital,
3090 max_list_continuation_indent: if ctx.flavor.requires_strict_list_indent() {
3091 Some(4)
3092 } else {
3093 None
3094 },
3095 };
3096 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
3097
3098 // Re-apply the common indent to each non-empty reflowed line so
3099 // that the replacement preserves the original structural indentation.
3100 if !common_indent.is_empty() {
3101 for line in &mut reflowed {
3102 if !line.is_empty() {
3103 *line = format!("{common_indent}{line}");
3104 }
3105 }
3106 }
3107
3108 // If the original paragraph ended with a hard break, preserve it
3109 // Preserve the original hard break format (backslash or two spaces)
3110 if let Some(break_marker) = hard_break_type
3111 && !reflowed.is_empty()
3112 {
3113 let last_idx = reflowed.len() - 1;
3114 if !has_hard_break(&reflowed[last_idx]) {
3115 reflowed[last_idx].push_str(break_marker);
3116 }
3117 }
3118
3119 let reflowed_text = reflowed.join(line_ending);
3120
3121 // Preserve trailing newline if the original paragraph had one
3122 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
3123 format!("{reflowed_text}{line_ending}")
3124 } else {
3125 reflowed_text
3126 };
3127
3128 // Get the original text to compare
3129 let original_text = &ctx.content[byte_range.clone()];
3130
3131 // Only generate a warning if the replacement is different from the original
3132 if original_text != replacement {
3133 // Create warning with actual fix
3134 // In default mode, report the specific line that violates
3135 // In normalize mode, report the whole paragraph
3136 // In sentence-per-line mode, report the entire paragraph
3137 let (warning_line, warning_end_line) = match config.reflow_mode {
3138 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
3139 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
3140 // Highlight the entire paragraph that needs reformatting
3141 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
3142 }
3143 ReflowMode::Default => {
3144 // Find the first line that exceeds the limit
3145 let mut violating_line = paragraph_start;
3146 for (idx, line) in paragraph_lines.iter().enumerate() {
3147 if self.calculate_effective_length(line) > config.line_length.get() {
3148 violating_line = paragraph_start + idx;
3149 break;
3150 }
3151 }
3152 (violating_line + 1, violating_line + 1)
3153 }
3154 };
3155
3156 warnings.push(LintWarning {
3157 rule_name: Some(self.name().to_string()),
3158 message: match config.reflow_mode {
3159 ReflowMode::Normalize => format!(
3160 "Paragraph could be normalized to use line length of {} characters",
3161 config.line_length.get()
3162 ),
3163 ReflowMode::SentencePerLine => {
3164 let num_sentences = split_into_sentences(¶graph_text).len();
3165 if paragraph_lines.len() == 1 {
3166 // Single line with multiple sentences
3167 format!("Line contains {num_sentences} sentences (one sentence per line required)")
3168 } else {
3169 let num_lines = paragraph_lines.len();
3170 // Multiple lines - could be split sentences or mixed
3171 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
3172 }
3173 },
3174 ReflowMode::SemanticLineBreaks => {
3175 let num_sentences = split_into_sentences(¶graph_text).len();
3176 format!(
3177 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
3178 )
3179 },
3180 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
3181 },
3182 line: warning_line,
3183 column: 1,
3184 end_line: warning_end_line,
3185 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
3186 severity: Severity::Warning,
3187 fix: Some(crate::rule::Fix {
3188 range: byte_range,
3189 replacement,
3190 }),
3191 });
3192 }
3193 }
3194 }
3195
3196 warnings
3197 }
3198
3199 /// Calculate string length based on the configured length mode
3200 fn calculate_string_length(&self, s: &str) -> usize {
3201 match self.config.length_mode {
3202 LengthMode::Chars => s.chars().count(),
3203 LengthMode::Visual => s.width(),
3204 LengthMode::Bytes => s.len(),
3205 }
3206 }
3207
3208 /// Calculate effective line length
3209 ///
3210 /// Returns the actual display length of the line using the configured length mode.
3211 fn calculate_effective_length(&self, line: &str) -> usize {
3212 self.calculate_string_length(line)
3213 }
3214
3215 /// Calculate line length with inline link/image URLs removed.
3216 ///
3217 /// For each inline link `[text](url)` or image `` on the line,
3218 /// computes the "savings" from removing the URL portion (keeping only `[text]`
3219 /// or `![alt]`). Returns `effective_length - total_savings`.
3220 ///
3221 /// Handles nested constructs (e.g., `[](url)`) by only counting the
3222 /// outermost construct to avoid double-counting.
3223 fn calculate_text_only_length(
3224 &self,
3225 effective_length: usize,
3226 line_number: usize,
3227 ctx: &crate::lint_context::LintContext,
3228 ) -> usize {
3229 let line_range = ctx.line_index.line_content_range(line_number);
3230 let line_byte_end = line_range.end;
3231
3232 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
3233 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
3234
3235 // Binary search: links are sorted by byte_offset, so link.line is non-decreasing
3236 let link_start = ctx.links.partition_point(|l| l.line < line_number);
3237 for link in &ctx.links[link_start..] {
3238 if link.line != line_number {
3239 break;
3240 }
3241 if link.is_reference {
3242 continue;
3243 }
3244 if !matches!(link.link_type, LinkType::Inline) {
3245 continue;
3246 }
3247 if link.byte_end > line_byte_end {
3248 continue;
3249 }
3250 let text_only_len = 2 + self.calculate_string_length(&link.text);
3251 constructs.push((link.byte_offset, link.byte_end, text_only_len));
3252 }
3253
3254 let img_start = ctx.images.partition_point(|i| i.line < line_number);
3255 for image in &ctx.images[img_start..] {
3256 if image.line != line_number {
3257 break;
3258 }
3259 if image.is_reference {
3260 continue;
3261 }
3262 if !matches!(image.link_type, LinkType::Inline) {
3263 continue;
3264 }
3265 if image.byte_end > line_byte_end {
3266 continue;
3267 }
3268 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
3269 constructs.push((image.byte_offset, image.byte_end, text_only_len));
3270 }
3271
3272 if constructs.is_empty() {
3273 return effective_length;
3274 }
3275
3276 // Sort by byte offset to handle overlapping/nested constructs
3277 constructs.sort_by_key(|&(start, _, _)| start);
3278
3279 let mut total_savings: usize = 0;
3280 let mut last_end: usize = 0;
3281
3282 for (start, end, text_only_len) in &constructs {
3283 // Skip constructs nested inside a previously counted one
3284 if *start < last_end {
3285 continue;
3286 }
3287 // Full construct length in configured length mode
3288 let full_source = &ctx.content[*start..*end];
3289 let full_len = self.calculate_string_length(full_source);
3290 total_savings += full_len.saturating_sub(*text_only_len);
3291 last_end = *end;
3292 }
3293
3294 effective_length.saturating_sub(total_savings)
3295 }
3296}