rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
7use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
8use crate::utils::range_utils::LineIndex;
9use crate::utils::range_utils::calculate_excess_range;
10use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19 split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30 pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35 Self {
36 config: MD013Config {
37 line_length: crate::types::LineLength::new(line_length),
38 code_blocks,
39 tables,
40 headings,
41 paragraphs: true, // Default to true for backwards compatibility
42 strict,
43 reflow: false,
44 reflow_mode: ReflowMode::default(),
45 length_mode: LengthMode::default(),
46 abbreviations: Vec::new(),
47 },
48 }
49 }
50
51 pub fn from_config_struct(config: MD013Config) -> Self {
52 Self { config }
53 }
54
55 fn should_ignore_line(
56 &self,
57 line: &str,
58 _lines: &[&str],
59 current_line: usize,
60 ctx: &crate::lint_context::LintContext,
61 ) -> bool {
62 if self.config.strict {
63 return false;
64 }
65
66 // Quick check for common patterns before expensive regex
67 let trimmed = line.trim();
68
69 // Only skip if the entire line is a URL (quick check first)
70 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71 return true;
72 }
73
74 // Only skip if the entire line is an image reference (quick check first)
75 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76 return true;
77 }
78
79 // Only skip if the entire line is a link reference (quick check first)
80 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81 return true;
82 }
83
84 // Code blocks with long strings (only check if in code block)
85 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86 && !trimmed.is_empty()
87 && !line.contains(' ')
88 && !line.contains('\t')
89 {
90 return true;
91 }
92
93 false
94 }
95
96 /// Check if rule should skip based on provided config (used for inline config support)
97 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
98 // Skip if content is empty
99 if ctx.content.is_empty() {
100 return true;
101 }
102
103 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
104 if config.reflow
105 && (config.reflow_mode == ReflowMode::SentencePerLine
106 || config.reflow_mode == ReflowMode::SemanticLineBreaks
107 || config.reflow_mode == ReflowMode::Normalize)
108 {
109 return false;
110 }
111
112 // Quick check: if total content is shorter than line limit, definitely skip
113 if ctx.content.len() <= config.line_length.get() {
114 return true;
115 }
116
117 // Skip if no line exceeds the limit
118 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
119 }
120}
121
122impl Rule for MD013LineLength {
123 fn name(&self) -> &'static str {
124 "MD013"
125 }
126
127 fn description(&self) -> &'static str {
128 "Line length should not be excessive"
129 }
130
131 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
132 // Use pre-parsed inline config from LintContext
133 let config_override = ctx.inline_config().get_rule_config("MD013");
134
135 // Apply configuration override if present
136 let effective_config = if let Some(json_config) = config_override {
137 if let Some(obj) = json_config.as_object() {
138 let mut config = self.config.clone();
139 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
140 config.line_length = crate::types::LineLength::new(line_length as usize);
141 }
142 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
143 config.code_blocks = code_blocks;
144 }
145 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
146 config.tables = tables;
147 }
148 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
149 config.headings = headings;
150 }
151 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
152 config.strict = strict;
153 }
154 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
155 config.reflow = reflow;
156 }
157 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
158 config.reflow_mode = match reflow_mode {
159 "default" => ReflowMode::Default,
160 "normalize" => ReflowMode::Normalize,
161 "sentence-per-line" => ReflowMode::SentencePerLine,
162 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
163 _ => ReflowMode::default(),
164 };
165 }
166 config
167 } else {
168 self.config.clone()
169 }
170 } else {
171 self.config.clone()
172 };
173
174 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
175 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
176 if self.should_skip_with_config(ctx, &effective_config)
177 && !(effective_config.reflow
178 && (effective_config.reflow_mode == ReflowMode::Normalize
179 || effective_config.reflow_mode == ReflowMode::SentencePerLine
180 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
181 {
182 return Ok(Vec::new());
183 }
184
185 // Direct implementation without DocumentStructure
186 let mut warnings = Vec::new();
187
188 // Special handling: line_length = 0 means "no line length limit"
189 // Skip all line length checks, but still allow reflow if enabled
190 let skip_length_checks = effective_config.line_length.is_unlimited();
191
192 // Pre-filter lines that could be problematic to avoid processing all lines
193 let mut candidate_lines = Vec::new();
194 if !skip_length_checks {
195 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
196 // Skip front matter - it should never be linted
197 if line_info.in_front_matter {
198 continue;
199 }
200
201 // Quick length check first
202 if line_info.byte_len > effective_config.line_length.get() {
203 candidate_lines.push(line_idx);
204 }
205 }
206 }
207
208 // If no candidate lines and not in normalize or sentence-per-line mode, early return
209 if candidate_lines.is_empty()
210 && !(effective_config.reflow
211 && (effective_config.reflow_mode == ReflowMode::Normalize
212 || effective_config.reflow_mode == ReflowMode::SentencePerLine
213 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
214 {
215 return Ok(warnings);
216 }
217
218 let lines = ctx.raw_lines();
219
220 // Create a quick lookup set for heading lines
221 // We need this for both the heading skip check AND the paragraphs check
222 let heading_lines_set: std::collections::HashSet<usize> = ctx
223 .lines
224 .iter()
225 .enumerate()
226 .filter(|(_, line)| line.heading.is_some())
227 .map(|(idx, _)| idx + 1)
228 .collect();
229
230 // Use pre-computed table blocks from context
231 // We need this for both the table skip check AND the paragraphs check
232 let table_blocks = &ctx.table_blocks;
233 let mut table_lines_set = std::collections::HashSet::new();
234 for table in table_blocks {
235 table_lines_set.insert(table.header_line + 1);
236 table_lines_set.insert(table.delimiter_line + 1);
237 for &line in &table.content_lines {
238 table_lines_set.insert(line + 1);
239 }
240 }
241
242 // Process candidate lines for line length checks
243 for &line_idx in &candidate_lines {
244 let line_number = line_idx + 1;
245 let line = lines[line_idx];
246
247 // Calculate actual line length
248 let effective_length = self.calculate_effective_length(line);
249
250 // Use single line length limit for all content
251 let line_limit = effective_config.line_length.get();
252
253 // Skip short lines immediately
254 if effective_length <= line_limit {
255 continue;
256 }
257
258 // Skip mkdocstrings blocks (already handled by LintContext)
259 if ctx.lines[line_idx].in_mkdocstrings {
260 continue;
261 }
262
263 // Skip various block types efficiently
264 if !effective_config.strict {
265 // Skip setext heading underlines
266 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
267 continue;
268 }
269
270 // Skip block elements according to config flags
271 // The flags mean: true = check these elements, false = skip these elements
272 // So we skip when the flag is FALSE and the line is in that element type
273 if (!effective_config.headings && heading_lines_set.contains(&line_number))
274 || (!effective_config.code_blocks
275 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
276 || (!effective_config.tables && table_lines_set.contains(&line_number))
277 || ctx.lines[line_number - 1].blockquote.is_some()
278 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
279 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
280 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
281 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
282 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
283 {
284 continue;
285 }
286
287 // Check if this is a paragraph/regular text line
288 // If paragraphs = false, skip lines that are NOT in special blocks
289 if !effective_config.paragraphs {
290 let is_special_block = heading_lines_set.contains(&line_number)
291 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
292 || table_lines_set.contains(&line_number)
293 || ctx.lines[line_number - 1].blockquote.is_some()
294 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
295 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
296 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
297 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
298 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
299 || ctx
300 .line_info(line_number)
301 .is_some_and(|info| info.in_mkdocs_container());
302
303 // Skip regular paragraph text when paragraphs = false
304 if !is_special_block {
305 continue;
306 }
307 }
308
309 // Skip lines that are only a URL, image ref, or link ref
310 if self.should_ignore_line(line, lines, line_idx, ctx) {
311 continue;
312 }
313 }
314
315 // In sentence-per-line mode, check if this is a single long sentence
316 // If so, emit a warning without a fix (user must manually rephrase)
317 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
318 let sentences = split_into_sentences(line.trim());
319 if sentences.len() == 1 {
320 // Single sentence that's too long - warn but don't auto-fix
321 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
322
323 let (start_line, start_col, end_line, end_col) =
324 calculate_excess_range(line_number, line, line_limit);
325
326 warnings.push(LintWarning {
327 rule_name: Some(self.name().to_string()),
328 message,
329 line: start_line,
330 column: start_col,
331 end_line,
332 end_column: end_col,
333 severity: Severity::Warning,
334 fix: None, // No auto-fix for long single sentences
335 });
336 continue;
337 }
338 // Multiple sentences will be handled by paragraph-based reflow
339 continue;
340 }
341
342 // In semantic-line-breaks mode, skip per-line checks —
343 // all reflow is handled at the paragraph level with cascading splits
344 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
345 continue;
346 }
347
348 // Don't provide fix for individual lines when reflow is enabled
349 // Paragraph-based fixes will be handled separately
350 let fix = None;
351
352 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
353
354 // Calculate precise character range for the excess portion
355 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
356
357 warnings.push(LintWarning {
358 rule_name: Some(self.name().to_string()),
359 message,
360 line: start_line,
361 column: start_col,
362 end_line,
363 end_column: end_col,
364 severity: Severity::Warning,
365 fix,
366 });
367 }
368
369 // If reflow is enabled, generate paragraph-based fixes
370 if effective_config.reflow {
371 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
372 // Merge paragraph warnings with line warnings, removing duplicates
373 for pw in paragraph_warnings {
374 // Remove any line warnings that overlap with this paragraph
375 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
376 warnings.push(pw);
377 }
378 }
379
380 Ok(warnings)
381 }
382
383 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
384 // For CLI usage, apply fixes from warnings
385 // LSP will use the warning-based fixes directly
386 let warnings = self.check(ctx)?;
387
388 // If there are no fixes, return content unchanged
389 if !warnings.iter().any(|w| w.fix.is_some()) {
390 return Ok(ctx.content.to_string());
391 }
392
393 // Apply warning-based fixes
394 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
395 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
396 }
397
398 fn as_any(&self) -> &dyn std::any::Any {
399 self
400 }
401
402 fn category(&self) -> RuleCategory {
403 RuleCategory::Whitespace
404 }
405
406 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
407 self.should_skip_with_config(ctx, &self.config)
408 }
409
410 fn default_config_section(&self) -> Option<(String, toml::Value)> {
411 let default_config = MD013Config::default();
412 let json_value = serde_json::to_value(&default_config).ok()?;
413 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
414
415 if let toml::Value::Table(table) = toml_value {
416 if !table.is_empty() {
417 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
418 } else {
419 None
420 }
421 } else {
422 None
423 }
424 }
425
426 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
427 let mut aliases = std::collections::HashMap::new();
428 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
429 Some(aliases)
430 }
431
432 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
433 where
434 Self: Sized,
435 {
436 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
437 // Use global line_length if rule-specific config still has default value
438 if rule_config.line_length.get() == 80 {
439 rule_config.line_length = config.global.line_length;
440 }
441 Box::new(Self::from_config_struct(rule_config))
442 }
443}
444
445impl MD013LineLength {
446 /// Generate paragraph-based fixes
447 fn generate_paragraph_fixes(
448 &self,
449 ctx: &crate::lint_context::LintContext,
450 config: &MD013Config,
451 lines: &[&str],
452 ) -> Vec<LintWarning> {
453 let mut warnings = Vec::new();
454 let line_index = LineIndex::new(ctx.content);
455
456 let mut i = 0;
457 while i < lines.len() {
458 let line_num = i + 1;
459
460 // Skip special structures (but NOT MkDocs containers - those get special handling)
461 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
462 info.in_code_block
463 || info.in_front_matter
464 || info.in_html_block
465 || info.in_html_comment
466 || info.in_esm_block
467 || info.in_jsx_expression
468 || info.in_mdx_comment
469 });
470
471 if should_skip_due_to_line_info
472 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
473 || lines[i].trim().starts_with('#')
474 || TableUtils::is_potential_table_row(lines[i])
475 || lines[i].trim().is_empty()
476 || is_horizontal_rule(lines[i].trim())
477 || is_template_directive_only(lines[i])
478 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
479 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
480 {
481 i += 1;
482 continue;
483 }
484
485 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
486 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
487 let container_start = i;
488
489 // Detect the actual indent level from the first content line
490 // (supports nested admonitions with 8+ spaces)
491 let first_line = lines[i];
492 let base_indent_len = first_line.len() - first_line.trim_start().len();
493 let base_indent: String = " ".repeat(base_indent_len);
494
495 // Collect consecutive MkDocs container paragraph lines
496 let mut container_lines: Vec<&str> = Vec::new();
497 while i < lines.len() {
498 let current_line_num = i + 1;
499 let line_info = ctx.line_info(current_line_num);
500
501 // Stop if we leave the MkDocs container
502 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
503 break;
504 }
505
506 let line = lines[i];
507
508 // Stop at paragraph boundaries within the container
509 if line.trim().is_empty() {
510 break;
511 }
512
513 // Skip list items, code blocks, headings within containers
514 if is_list_item(line.trim())
515 || line.trim().starts_with("```")
516 || line.trim().starts_with("~~~")
517 || line.trim().starts_with('#')
518 {
519 break;
520 }
521
522 container_lines.push(line);
523 i += 1;
524 }
525
526 if container_lines.is_empty() {
527 // Must advance i to avoid infinite loop when we encounter
528 // non-paragraph content (code block, list, heading, empty line)
529 // at the start of an MkDocs container
530 i += 1;
531 continue;
532 }
533
534 // Strip the base indent from each line and join for reflow
535 let stripped_lines: Vec<&str> = container_lines
536 .iter()
537 .map(|line| {
538 if line.starts_with(&base_indent) {
539 &line[base_indent_len..]
540 } else {
541 line.trim_start()
542 }
543 })
544 .collect();
545 let paragraph_text = stripped_lines.join(" ");
546
547 // Check if reflow is needed
548 let needs_reflow = match config.reflow_mode {
549 ReflowMode::Normalize => container_lines.len() > 1,
550 ReflowMode::SentencePerLine => {
551 let sentences = split_into_sentences(¶graph_text);
552 sentences.len() > 1 || container_lines.len() > 1
553 }
554 ReflowMode::SemanticLineBreaks => {
555 let sentences = split_into_sentences(¶graph_text);
556 sentences.len() > 1
557 || container_lines.len() > 1
558 || container_lines
559 .iter()
560 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
561 }
562 ReflowMode::Default => container_lines
563 .iter()
564 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
565 };
566
567 if !needs_reflow {
568 continue;
569 }
570
571 // Calculate byte range for this container paragraph
572 let start_range = line_index.whole_line_range(container_start + 1);
573 let end_line = container_start + container_lines.len() - 1;
574 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
575 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
576 } else {
577 line_index.whole_line_range(end_line + 1)
578 };
579 let byte_range = start_range.start..end_range.end;
580
581 // Reflow with adjusted line length (accounting for the 4-space indent)
582 let reflow_line_length = if config.line_length.is_unlimited() {
583 usize::MAX
584 } else {
585 config.line_length.get().saturating_sub(base_indent_len).max(1)
586 };
587 let reflow_options = crate::utils::text_reflow::ReflowOptions {
588 line_length: reflow_line_length,
589 break_on_sentences: true,
590 preserve_breaks: false,
591 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
592 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
593 abbreviations: config.abbreviations_for_reflow(),
594 };
595 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
596
597 // Re-add the 4-space indent to each reflowed line
598 let reflowed_with_indent: Vec<String> =
599 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
600 let reflowed_text = reflowed_with_indent.join("\n");
601
602 // Preserve trailing newline
603 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
604 format!("{reflowed_text}\n")
605 } else {
606 reflowed_text
607 };
608
609 // Only generate a warning if the replacement is different
610 let original_text = &ctx.content[byte_range.clone()];
611 if original_text != replacement {
612 warnings.push(LintWarning {
613 rule_name: Some(self.name().to_string()),
614 message: format!(
615 "Line length {} exceeds {} characters (in MkDocs container)",
616 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
617 config.line_length.get()
618 ),
619 line: container_start + 1,
620 column: 1,
621 end_line: end_line + 1,
622 end_column: lines[end_line].len() + 1,
623 severity: Severity::Warning,
624 fix: Some(crate::rule::Fix {
625 range: byte_range,
626 replacement,
627 }),
628 });
629 }
630 continue;
631 }
632
633 // Helper function to detect semantic line markers
634 let is_semantic_line = |content: &str| -> bool {
635 let trimmed = content.trim_start();
636 let semantic_markers = [
637 "NOTE:",
638 "WARNING:",
639 "IMPORTANT:",
640 "CAUTION:",
641 "TIP:",
642 "DANGER:",
643 "HINT:",
644 "INFO:",
645 ];
646 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
647 };
648
649 // Helper function to detect fence markers (opening or closing)
650 let is_fence_marker = |content: &str| -> bool {
651 let trimmed = content.trim_start();
652 trimmed.starts_with("```") || trimmed.starts_with("~~~")
653 };
654
655 // Check if this is a list item - handle it specially
656 let trimmed = lines[i].trim();
657 if is_list_item(trimmed) {
658 // Collect the entire list item including continuation lines
659 let list_start = i;
660 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
661 let marker_len = marker.len();
662
663 // Track lines and their types (content, code block, fence, nested list)
664 #[derive(Clone)]
665 enum LineType {
666 Content(String),
667 CodeBlock(String, usize), // content and original indent
668 NestedListItem(String, usize), // full line content and original indent
669 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
670 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
671 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
672 Empty,
673 }
674
675 let mut actual_indent: Option<usize> = None;
676 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
677 i += 1;
678
679 // Collect continuation lines using ctx.lines for metadata
680 while i < lines.len() {
681 let line_info = &ctx.lines[i];
682
683 // Use pre-computed is_blank from ctx
684 if line_info.is_blank {
685 // Empty line - check if next line is indented (part of list item)
686 if i + 1 < lines.len() {
687 let next_info = &ctx.lines[i + 1];
688
689 // Check if next line is indented enough to be continuation
690 if !next_info.is_blank && next_info.indent >= marker_len {
691 // This blank line is between paragraphs/blocks in the list item
692 list_item_lines.push(LineType::Empty);
693 i += 1;
694 continue;
695 }
696 }
697 // No indented line after blank, end of list item
698 break;
699 }
700
701 // Use pre-computed indent from ctx
702 let indent = line_info.indent;
703
704 // Valid continuation must be indented at least marker_len
705 if indent >= marker_len {
706 let trimmed = line_info.content(ctx.content).trim();
707
708 // Use pre-computed in_code_block from ctx
709 if line_info.in_code_block {
710 list_item_lines.push(LineType::CodeBlock(
711 line_info.content(ctx.content)[indent..].to_string(),
712 indent,
713 ));
714 i += 1;
715 continue;
716 }
717
718 // Check if this is a SIBLING list item (breaks parent)
719 // Nested lists are indented >= marker_len and are PART of the parent item
720 // Siblings are at indent < marker_len (at or before parent marker)
721 if is_list_item(trimmed) && indent < marker_len {
722 // This is a sibling item at same or higher level - end parent item
723 break;
724 }
725
726 // Check if this is a NESTED list item marker
727 // Nested lists should be processed separately UNLESS they're part of a
728 // multi-paragraph list item (indicated by a blank line before them OR
729 // it's a continuation of an already-started nested list)
730 if is_list_item(trimmed) && indent >= marker_len {
731 // Check if there was a blank line before this (multi-paragraph context)
732 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
733
734 // Check if we've already seen nested list content (another nested item)
735 let has_nested_content = list_item_lines.iter().any(|line| {
736 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
737 || matches!(line, LineType::NestedListItem(_, _))
738 });
739
740 if !has_blank_before && !has_nested_content {
741 // Single-paragraph context with no prior nested items: starts a new item
742 // End parent collection; nested list will be processed next
743 break;
744 }
745 // else: multi-paragraph context or continuation of nested list, keep collecting
746 // Mark this as a nested list item to preserve its structure
747 list_item_lines.push(LineType::NestedListItem(
748 line_info.content(ctx.content)[indent..].to_string(),
749 indent,
750 ));
751 i += 1;
752 continue;
753 }
754
755 // Normal continuation: marker_len to marker_len+3
756 if indent <= marker_len + 3 {
757 // Set actual_indent from first non-code continuation if not set
758 if actual_indent.is_none() {
759 actual_indent = Some(indent);
760 }
761
762 // Extract content (remove indentation and trailing whitespace)
763 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
764 // See: https://github.com/rvben/rumdl/issues/76
765 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
766
767 // Check if this is a div marker (::: opening or closing)
768 // These must be preserved on their own line, not merged into paragraphs
769 if line_info.is_div_marker {
770 list_item_lines.push(LineType::DivMarker(content));
771 }
772 // Check if this is a fence marker (opening or closing)
773 // These should be treated as code block lines, not paragraph content
774 else if is_fence_marker(&content) {
775 list_item_lines.push(LineType::CodeBlock(content, indent));
776 }
777 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
778 else if is_semantic_line(&content) {
779 list_item_lines.push(LineType::SemanticLine(content));
780 }
781 // Check if this is a snippet block delimiter (-8<- or --8<--)
782 // These must be preserved on their own lines for MkDocs Snippets extension
783 else if is_snippet_block_delimiter(&content) {
784 list_item_lines.push(LineType::SnippetLine(content));
785 } else {
786 list_item_lines.push(LineType::Content(content));
787 }
788 i += 1;
789 } else {
790 // indent >= marker_len + 4: indented code block
791 list_item_lines.push(LineType::CodeBlock(
792 line_info.content(ctx.content)[indent..].to_string(),
793 indent,
794 ));
795 i += 1;
796 }
797 } else {
798 // Not indented enough, end of list item
799 break;
800 }
801 }
802
803 // Use detected indent or fallback to marker length
804 let indent_size = actual_indent.unwrap_or(marker_len);
805 let expected_indent = " ".repeat(indent_size);
806
807 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
808 #[derive(Clone)]
809 enum Block {
810 Paragraph(Vec<String>),
811 Code {
812 lines: Vec<(String, usize)>, // (content, indent) pairs
813 has_preceding_blank: bool, // Whether there was a blank line before this block
814 },
815 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
816 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
817 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
818 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
819 Html {
820 lines: Vec<String>, // HTML content preserved exactly as-is
821 has_preceding_blank: bool, // Whether there was a blank line before this block
822 },
823 }
824
825 // HTML tag detection helpers
826 // Block-level HTML tags that should trigger HTML block detection
827 const BLOCK_LEVEL_TAGS: &[&str] = &[
828 "div",
829 "details",
830 "summary",
831 "section",
832 "article",
833 "header",
834 "footer",
835 "nav",
836 "aside",
837 "main",
838 "table",
839 "thead",
840 "tbody",
841 "tfoot",
842 "tr",
843 "td",
844 "th",
845 "ul",
846 "ol",
847 "li",
848 "dl",
849 "dt",
850 "dd",
851 "pre",
852 "blockquote",
853 "figure",
854 "figcaption",
855 "form",
856 "fieldset",
857 "legend",
858 "hr",
859 "p",
860 "h1",
861 "h2",
862 "h3",
863 "h4",
864 "h5",
865 "h6",
866 "style",
867 "script",
868 "noscript",
869 ];
870
871 fn is_block_html_opening_tag(line: &str) -> Option<String> {
872 let trimmed = line.trim();
873
874 // Check for HTML comments
875 if trimmed.starts_with("<!--") {
876 return Some("!--".to_string());
877 }
878
879 // Check for opening tags
880 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
881 // Extract tag name from <tagname ...> or <tagname>
882 let after_bracket = &trimmed[1..];
883 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
884 let tag_name = after_bracket[..end].to_lowercase();
885
886 // Only treat as block if it's a known block-level tag
887 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
888 return Some(tag_name);
889 }
890 }
891 }
892 None
893 }
894
895 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
896 let trimmed = line.trim();
897
898 // Special handling for HTML comments
899 if tag_name == "!--" {
900 return trimmed.ends_with("-->");
901 }
902
903 // Check for closing tags: </tagname> or </tagname ...>
904 trimmed.starts_with(&format!("</{tag_name}>"))
905 || trimmed.starts_with(&format!("</{tag_name} "))
906 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
907 }
908
909 fn is_self_closing_tag(line: &str) -> bool {
910 let trimmed = line.trim();
911 trimmed.ends_with("/>")
912 }
913
914 let mut blocks: Vec<Block> = Vec::new();
915 let mut current_paragraph: Vec<String> = Vec::new();
916 let mut current_code_block: Vec<(String, usize)> = Vec::new();
917 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
918 let mut current_html_block: Vec<String> = Vec::new();
919 let mut html_tag_stack: Vec<String> = Vec::new();
920 let mut in_code = false;
921 let mut in_nested_list = false;
922 let mut in_html_block = false;
923 let mut had_preceding_blank = false; // Track if we just saw an empty line
924 let mut code_block_has_preceding_blank = false; // Track blank before current code block
925 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
926
927 for line in &list_item_lines {
928 match line {
929 LineType::Empty => {
930 if in_code {
931 current_code_block.push((String::new(), 0));
932 } else if in_nested_list {
933 current_nested_list.push((String::new(), 0));
934 } else if in_html_block {
935 // Allow blank lines inside HTML blocks
936 current_html_block.push(String::new());
937 } else if !current_paragraph.is_empty() {
938 blocks.push(Block::Paragraph(current_paragraph.clone()));
939 current_paragraph.clear();
940 }
941 // Mark that we saw a blank line
942 had_preceding_blank = true;
943 }
944 LineType::Content(content) => {
945 // Check if we're currently in an HTML block
946 if in_html_block {
947 current_html_block.push(content.clone());
948
949 // Check if this line closes any open HTML tags
950 if let Some(last_tag) = html_tag_stack.last() {
951 if is_html_closing_tag(content, last_tag) {
952 html_tag_stack.pop();
953
954 // If stack is empty, HTML block is complete
955 if html_tag_stack.is_empty() {
956 blocks.push(Block::Html {
957 lines: current_html_block.clone(),
958 has_preceding_blank: html_block_has_preceding_blank,
959 });
960 current_html_block.clear();
961 in_html_block = false;
962 }
963 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
964 // Nested opening tag within HTML block
965 if !is_self_closing_tag(content) {
966 html_tag_stack.push(new_tag);
967 }
968 }
969 }
970 had_preceding_blank = false;
971 } else {
972 // Not in HTML block - check if this line starts one
973 if let Some(tag_name) = is_block_html_opening_tag(content) {
974 // Flush current paragraph before starting HTML block
975 if in_code {
976 blocks.push(Block::Code {
977 lines: current_code_block.clone(),
978 has_preceding_blank: code_block_has_preceding_blank,
979 });
980 current_code_block.clear();
981 in_code = false;
982 } else if in_nested_list {
983 blocks.push(Block::NestedList(current_nested_list.clone()));
984 current_nested_list.clear();
985 in_nested_list = false;
986 } else if !current_paragraph.is_empty() {
987 blocks.push(Block::Paragraph(current_paragraph.clone()));
988 current_paragraph.clear();
989 }
990
991 // Start new HTML block
992 in_html_block = true;
993 html_block_has_preceding_blank = had_preceding_blank;
994 current_html_block.push(content.clone());
995
996 // Check if it's self-closing or needs a closing tag
997 if is_self_closing_tag(content) {
998 // Self-closing tag - complete the HTML block immediately
999 blocks.push(Block::Html {
1000 lines: current_html_block.clone(),
1001 has_preceding_blank: html_block_has_preceding_blank,
1002 });
1003 current_html_block.clear();
1004 in_html_block = false;
1005 } else {
1006 // Regular opening tag - push to stack
1007 html_tag_stack.push(tag_name);
1008 }
1009 } else {
1010 // Regular content line - add to paragraph
1011 if in_code {
1012 // Switching from code to content
1013 blocks.push(Block::Code {
1014 lines: current_code_block.clone(),
1015 has_preceding_blank: code_block_has_preceding_blank,
1016 });
1017 current_code_block.clear();
1018 in_code = false;
1019 } else if in_nested_list {
1020 // Switching from nested list to content
1021 blocks.push(Block::NestedList(current_nested_list.clone()));
1022 current_nested_list.clear();
1023 in_nested_list = false;
1024 }
1025 current_paragraph.push(content.clone());
1026 }
1027 had_preceding_blank = false; // Reset after content
1028 }
1029 }
1030 LineType::CodeBlock(content, indent) => {
1031 if in_nested_list {
1032 // Switching from nested list to code
1033 blocks.push(Block::NestedList(current_nested_list.clone()));
1034 current_nested_list.clear();
1035 in_nested_list = false;
1036 } else if in_html_block {
1037 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1038 blocks.push(Block::Html {
1039 lines: current_html_block.clone(),
1040 has_preceding_blank: html_block_has_preceding_blank,
1041 });
1042 current_html_block.clear();
1043 html_tag_stack.clear();
1044 in_html_block = false;
1045 }
1046 if !in_code {
1047 // Switching from content to code
1048 if !current_paragraph.is_empty() {
1049 blocks.push(Block::Paragraph(current_paragraph.clone()));
1050 current_paragraph.clear();
1051 }
1052 in_code = true;
1053 // Record whether there was a blank line before this code block
1054 code_block_has_preceding_blank = had_preceding_blank;
1055 }
1056 current_code_block.push((content.clone(), *indent));
1057 had_preceding_blank = false; // Reset after code
1058 }
1059 LineType::NestedListItem(content, indent) => {
1060 if in_code {
1061 // Switching from code to nested list
1062 blocks.push(Block::Code {
1063 lines: current_code_block.clone(),
1064 has_preceding_blank: code_block_has_preceding_blank,
1065 });
1066 current_code_block.clear();
1067 in_code = false;
1068 } else if in_html_block {
1069 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1070 blocks.push(Block::Html {
1071 lines: current_html_block.clone(),
1072 has_preceding_blank: html_block_has_preceding_blank,
1073 });
1074 current_html_block.clear();
1075 html_tag_stack.clear();
1076 in_html_block = false;
1077 }
1078 if !in_nested_list {
1079 // Switching from content to nested list
1080 if !current_paragraph.is_empty() {
1081 blocks.push(Block::Paragraph(current_paragraph.clone()));
1082 current_paragraph.clear();
1083 }
1084 in_nested_list = true;
1085 }
1086 current_nested_list.push((content.clone(), *indent));
1087 had_preceding_blank = false; // Reset after nested list
1088 }
1089 LineType::SemanticLine(content) => {
1090 // Semantic lines are standalone - flush any current block and add as separate block
1091 if in_code {
1092 blocks.push(Block::Code {
1093 lines: current_code_block.clone(),
1094 has_preceding_blank: code_block_has_preceding_blank,
1095 });
1096 current_code_block.clear();
1097 in_code = false;
1098 } else if in_nested_list {
1099 blocks.push(Block::NestedList(current_nested_list.clone()));
1100 current_nested_list.clear();
1101 in_nested_list = false;
1102 } else if in_html_block {
1103 blocks.push(Block::Html {
1104 lines: current_html_block.clone(),
1105 has_preceding_blank: html_block_has_preceding_blank,
1106 });
1107 current_html_block.clear();
1108 html_tag_stack.clear();
1109 in_html_block = false;
1110 } else if !current_paragraph.is_empty() {
1111 blocks.push(Block::Paragraph(current_paragraph.clone()));
1112 current_paragraph.clear();
1113 }
1114 // Add semantic line as its own block
1115 blocks.push(Block::SemanticLine(content.clone()));
1116 had_preceding_blank = false; // Reset after semantic line
1117 }
1118 LineType::SnippetLine(content) => {
1119 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1120 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1121 if in_code {
1122 blocks.push(Block::Code {
1123 lines: current_code_block.clone(),
1124 has_preceding_blank: code_block_has_preceding_blank,
1125 });
1126 current_code_block.clear();
1127 in_code = false;
1128 } else if in_nested_list {
1129 blocks.push(Block::NestedList(current_nested_list.clone()));
1130 current_nested_list.clear();
1131 in_nested_list = false;
1132 } else if in_html_block {
1133 blocks.push(Block::Html {
1134 lines: current_html_block.clone(),
1135 has_preceding_blank: html_block_has_preceding_blank,
1136 });
1137 current_html_block.clear();
1138 html_tag_stack.clear();
1139 in_html_block = false;
1140 } else if !current_paragraph.is_empty() {
1141 blocks.push(Block::Paragraph(current_paragraph.clone()));
1142 current_paragraph.clear();
1143 }
1144 // Add snippet line as its own block
1145 blocks.push(Block::SnippetLine(content.clone()));
1146 had_preceding_blank = false;
1147 }
1148 LineType::DivMarker(content) => {
1149 // Div markers (::: opening or closing) are standalone structural delimiters
1150 // Flush any current block and add as separate block
1151 if in_code {
1152 blocks.push(Block::Code {
1153 lines: current_code_block.clone(),
1154 has_preceding_blank: code_block_has_preceding_blank,
1155 });
1156 current_code_block.clear();
1157 in_code = false;
1158 } else if in_nested_list {
1159 blocks.push(Block::NestedList(current_nested_list.clone()));
1160 current_nested_list.clear();
1161 in_nested_list = false;
1162 } else if in_html_block {
1163 blocks.push(Block::Html {
1164 lines: current_html_block.clone(),
1165 has_preceding_blank: html_block_has_preceding_blank,
1166 });
1167 current_html_block.clear();
1168 html_tag_stack.clear();
1169 in_html_block = false;
1170 } else if !current_paragraph.is_empty() {
1171 blocks.push(Block::Paragraph(current_paragraph.clone()));
1172 current_paragraph.clear();
1173 }
1174 blocks.push(Block::DivMarker(content.clone()));
1175 had_preceding_blank = false;
1176 }
1177 }
1178 }
1179
1180 // Push remaining block
1181 if in_code && !current_code_block.is_empty() {
1182 blocks.push(Block::Code {
1183 lines: current_code_block,
1184 has_preceding_blank: code_block_has_preceding_blank,
1185 });
1186 } else if in_nested_list && !current_nested_list.is_empty() {
1187 blocks.push(Block::NestedList(current_nested_list));
1188 } else if in_html_block && !current_html_block.is_empty() {
1189 // If we still have an unclosed HTML block, push it anyway
1190 // (malformed HTML - missing closing tag)
1191 blocks.push(Block::Html {
1192 lines: current_html_block,
1193 has_preceding_blank: html_block_has_preceding_blank,
1194 });
1195 } else if !current_paragraph.is_empty() {
1196 blocks.push(Block::Paragraph(current_paragraph));
1197 }
1198
1199 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1200 let content_lines: Vec<String> = list_item_lines
1201 .iter()
1202 .filter_map(|line| {
1203 if let LineType::Content(s) = line {
1204 Some(s.clone())
1205 } else {
1206 None
1207 }
1208 })
1209 .collect();
1210
1211 // Check if we need to reflow this list item
1212 // We check the combined content to see if it exceeds length limits
1213 let combined_content = content_lines.join(" ").trim().to_string();
1214 let full_line = format!("{marker}{combined_content}");
1215
1216 // Helper to check if we should reflow in normalize mode
1217 let should_normalize = || {
1218 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1219 // DO normalize if it has plain text content that spans multiple lines
1220 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1221 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1222 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1223 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1224 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1225 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1226
1227 // If we have structural blocks but no paragraphs, don't normalize
1228 if (has_nested_lists
1229 || has_code_blocks
1230 || has_semantic_lines
1231 || has_snippet_lines
1232 || has_div_markers)
1233 && !has_paragraphs
1234 {
1235 return false;
1236 }
1237
1238 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1239 if has_paragraphs {
1240 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1241 if paragraph_count > 1 {
1242 // Multiple paragraph blocks should be normalized
1243 return true;
1244 }
1245
1246 // Single paragraph block: normalize if it has multiple content lines
1247 if content_lines.len() > 1 {
1248 return true;
1249 }
1250 }
1251
1252 false
1253 };
1254
1255 let needs_reflow = match config.reflow_mode {
1256 ReflowMode::Normalize => {
1257 // Only reflow if:
1258 // 1. The combined line would exceed the limit, OR
1259 // 2. The list item should be normalized (has multi-line plain text)
1260 let combined_length = self.calculate_effective_length(&full_line);
1261 if combined_length > config.line_length.get() {
1262 true
1263 } else {
1264 should_normalize()
1265 }
1266 }
1267 ReflowMode::SentencePerLine => {
1268 // Check if list item has multiple sentences
1269 let sentences = split_into_sentences(&combined_content);
1270 sentences.len() > 1
1271 }
1272 ReflowMode::SemanticLineBreaks => {
1273 let sentences = split_into_sentences(&combined_content);
1274 sentences.len() > 1
1275 || (list_start..i).any(|line_idx| {
1276 self.calculate_effective_length(lines[line_idx]) > config.line_length.get()
1277 })
1278 }
1279 ReflowMode::Default => {
1280 // In default mode, only reflow if any individual line exceeds limit
1281 (list_start..i)
1282 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1283 }
1284 };
1285
1286 if needs_reflow {
1287 let start_range = line_index.whole_line_range(list_start + 1);
1288 let end_line = i - 1;
1289 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1290 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1291 } else {
1292 line_index.whole_line_range(end_line + 1)
1293 };
1294 let byte_range = start_range.start..end_range.end;
1295
1296 // Reflow each block (paragraphs only, preserve code blocks)
1297 // When line_length = 0 (no limit), use a very large value for reflow
1298 let reflow_line_length = if config.line_length.is_unlimited() {
1299 usize::MAX
1300 } else {
1301 config.line_length.get().saturating_sub(indent_size).max(1)
1302 };
1303 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1304 line_length: reflow_line_length,
1305 break_on_sentences: true,
1306 preserve_breaks: false,
1307 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1308 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1309 abbreviations: config.abbreviations_for_reflow(),
1310 };
1311
1312 let mut result: Vec<String> = Vec::new();
1313 let mut is_first_block = true;
1314
1315 for (block_idx, block) in blocks.iter().enumerate() {
1316 match block {
1317 Block::Paragraph(para_lines) => {
1318 // Split the paragraph into segments at hard break boundaries
1319 // Each segment can be reflowed independently
1320 let segments = split_into_segments(para_lines);
1321
1322 for (segment_idx, segment) in segments.iter().enumerate() {
1323 // Check if this segment ends with a hard break and what type
1324 let hard_break_type = segment.last().and_then(|line| {
1325 let line = line.strip_suffix('\r').unwrap_or(line);
1326 if line.ends_with('\\') {
1327 Some("\\")
1328 } else if line.ends_with(" ") {
1329 Some(" ")
1330 } else {
1331 None
1332 }
1333 });
1334
1335 // Join and reflow the segment (removing the hard break marker for processing)
1336 let segment_for_reflow: Vec<String> = segment
1337 .iter()
1338 .map(|line| {
1339 // Strip hard break marker (2 spaces or backslash) for reflow processing
1340 if line.ends_with('\\') {
1341 line[..line.len() - 1].trim_end().to_string()
1342 } else if line.ends_with(" ") {
1343 line[..line.len() - 2].trim_end().to_string()
1344 } else {
1345 line.clone()
1346 }
1347 })
1348 .collect();
1349
1350 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1351 if !segment_text.is_empty() {
1352 let reflowed =
1353 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1354
1355 if is_first_block && segment_idx == 0 {
1356 // First segment of first block starts with marker
1357 result.push(format!("{marker}{}", reflowed[0]));
1358 for line in reflowed.iter().skip(1) {
1359 result.push(format!("{expected_indent}{line}"));
1360 }
1361 is_first_block = false;
1362 } else {
1363 // Subsequent segments
1364 for line in reflowed {
1365 result.push(format!("{expected_indent}{line}"));
1366 }
1367 }
1368
1369 // If this segment had a hard break, add it back to the last line
1370 // Preserve the original hard break format (backslash or two spaces)
1371 if let Some(break_marker) = hard_break_type
1372 && let Some(last_line) = result.last_mut()
1373 {
1374 last_line.push_str(break_marker);
1375 }
1376 }
1377 }
1378
1379 // Add blank line after paragraph block if there's a next block
1380 // BUT: check if next block is a code block that doesn't want a preceding blank
1381 // Also don't add blank lines before snippet lines (they should stay tight)
1382 if block_idx < blocks.len() - 1 {
1383 let next_block = &blocks[block_idx + 1];
1384 let should_add_blank = match next_block {
1385 Block::Code {
1386 has_preceding_blank, ..
1387 } => *has_preceding_blank,
1388 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1389 _ => true, // For all other blocks, add blank line
1390 };
1391 if should_add_blank {
1392 result.push(String::new());
1393 }
1394 }
1395 }
1396 Block::Code {
1397 lines: code_lines,
1398 has_preceding_blank: _,
1399 } => {
1400 // Preserve code blocks as-is with original indentation
1401 // NOTE: Blank line before code block is handled by the previous block
1402 // (see paragraph block's logic above)
1403
1404 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1405 if is_first_block && idx == 0 {
1406 // First line of first block gets marker
1407 result.push(format!(
1408 "{marker}{}",
1409 " ".repeat(orig_indent - marker_len) + content
1410 ));
1411 is_first_block = false;
1412 } else if content.is_empty() {
1413 result.push(String::new());
1414 } else {
1415 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1416 }
1417 }
1418 }
1419 Block::NestedList(nested_items) => {
1420 // Preserve nested list items as-is with original indentation
1421 if !is_first_block {
1422 result.push(String::new());
1423 }
1424
1425 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1426 if is_first_block && idx == 0 {
1427 // First line of first block gets marker
1428 result.push(format!(
1429 "{marker}{}",
1430 " ".repeat(orig_indent - marker_len) + content
1431 ));
1432 is_first_block = false;
1433 } else if content.is_empty() {
1434 result.push(String::new());
1435 } else {
1436 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1437 }
1438 }
1439
1440 // Add blank line after nested list if there's a next block
1441 // Check if next block is a code block that doesn't want a preceding blank
1442 if block_idx < blocks.len() - 1 {
1443 let next_block = &blocks[block_idx + 1];
1444 let should_add_blank = match next_block {
1445 Block::Code {
1446 has_preceding_blank, ..
1447 } => *has_preceding_blank,
1448 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1449 _ => true, // For all other blocks, add blank line
1450 };
1451 if should_add_blank {
1452 result.push(String::new());
1453 }
1454 }
1455 }
1456 Block::SemanticLine(content) => {
1457 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1458 // Add blank line before if not first block
1459 if !is_first_block {
1460 result.push(String::new());
1461 }
1462
1463 if is_first_block {
1464 // First block starts with marker
1465 result.push(format!("{marker}{content}"));
1466 is_first_block = false;
1467 } else {
1468 // Subsequent blocks use expected indent
1469 result.push(format!("{expected_indent}{content}"));
1470 }
1471
1472 // Add blank line after semantic line if there's a next block
1473 // Check if next block is a code block that doesn't want a preceding blank
1474 if block_idx < blocks.len() - 1 {
1475 let next_block = &blocks[block_idx + 1];
1476 let should_add_blank = match next_block {
1477 Block::Code {
1478 has_preceding_blank, ..
1479 } => *has_preceding_blank,
1480 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1481 _ => true, // For all other blocks, add blank line
1482 };
1483 if should_add_blank {
1484 result.push(String::new());
1485 }
1486 }
1487 }
1488 Block::SnippetLine(content) => {
1489 // Preserve snippet delimiters (-8<-) as-is on their own line
1490 // Unlike semantic lines, snippet lines don't add extra blank lines
1491 if is_first_block {
1492 // First block starts with marker
1493 result.push(format!("{marker}{content}"));
1494 is_first_block = false;
1495 } else {
1496 // Subsequent blocks use expected indent
1497 result.push(format!("{expected_indent}{content}"));
1498 }
1499 // No blank lines added before or after snippet delimiters
1500 }
1501 Block::DivMarker(content) => {
1502 // Preserve div markers (::: opening or closing) as-is on their own line
1503 if is_first_block {
1504 result.push(format!("{marker}{content}"));
1505 is_first_block = false;
1506 } else {
1507 result.push(format!("{expected_indent}{content}"));
1508 }
1509 }
1510 Block::Html {
1511 lines: html_lines,
1512 has_preceding_blank: _,
1513 } => {
1514 // Preserve HTML blocks exactly as-is with original indentation
1515 // NOTE: Blank line before HTML block is handled by the previous block
1516
1517 for (idx, line) in html_lines.iter().enumerate() {
1518 if is_first_block && idx == 0 {
1519 // First line of first block gets marker
1520 result.push(format!("{marker}{line}"));
1521 is_first_block = false;
1522 } else if line.is_empty() {
1523 // Preserve blank lines inside HTML blocks
1524 result.push(String::new());
1525 } else {
1526 // Preserve lines with their original content (already includes indentation)
1527 result.push(format!("{expected_indent}{line}"));
1528 }
1529 }
1530
1531 // Add blank line after HTML block if there's a next block
1532 if block_idx < blocks.len() - 1 {
1533 let next_block = &blocks[block_idx + 1];
1534 let should_add_blank = match next_block {
1535 Block::Code {
1536 has_preceding_blank, ..
1537 } => *has_preceding_blank,
1538 Block::Html {
1539 has_preceding_blank, ..
1540 } => *has_preceding_blank,
1541 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1542 _ => true, // For all other blocks, add blank line
1543 };
1544 if should_add_blank {
1545 result.push(String::new());
1546 }
1547 }
1548 }
1549 }
1550 }
1551
1552 let reflowed_text = result.join("\n");
1553
1554 // Preserve trailing newline
1555 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1556 format!("{reflowed_text}\n")
1557 } else {
1558 reflowed_text
1559 };
1560
1561 // Get the original text to compare
1562 let original_text = &ctx.content[byte_range.clone()];
1563
1564 // Only generate a warning if the replacement is different from the original
1565 if original_text != replacement {
1566 // Generate an appropriate message based on why reflow is needed
1567 let message = match config.reflow_mode {
1568 ReflowMode::SentencePerLine => {
1569 let num_sentences = split_into_sentences(&combined_content).len();
1570 let num_lines = content_lines.len();
1571 if num_lines == 1 {
1572 // Single line with multiple sentences
1573 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1574 } else {
1575 // Multiple lines - could be split sentences or mixed
1576 format!(
1577 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1578 )
1579 }
1580 }
1581 ReflowMode::SemanticLineBreaks => {
1582 let num_sentences = split_into_sentences(&combined_content).len();
1583 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
1584 }
1585 ReflowMode::Normalize => {
1586 let combined_length = self.calculate_effective_length(&full_line);
1587 if combined_length > config.line_length.get() {
1588 format!(
1589 "Line length {} exceeds {} characters",
1590 combined_length,
1591 config.line_length.get()
1592 )
1593 } else {
1594 "Multi-line content can be normalized".to_string()
1595 }
1596 }
1597 ReflowMode::Default => {
1598 let combined_length = self.calculate_effective_length(&full_line);
1599 format!(
1600 "Line length {} exceeds {} characters",
1601 combined_length,
1602 config.line_length.get()
1603 )
1604 }
1605 };
1606
1607 warnings.push(LintWarning {
1608 rule_name: Some(self.name().to_string()),
1609 message,
1610 line: list_start + 1,
1611 column: 1,
1612 end_line: end_line + 1,
1613 end_column: lines[end_line].len() + 1,
1614 severity: Severity::Warning,
1615 fix: Some(crate::rule::Fix {
1616 range: byte_range,
1617 replacement,
1618 }),
1619 });
1620 }
1621 }
1622 continue;
1623 }
1624
1625 // Found start of a paragraph - collect all lines in it
1626 let paragraph_start = i;
1627 let mut paragraph_lines = vec![lines[i]];
1628 i += 1;
1629
1630 while i < lines.len() {
1631 let next_line = lines[i];
1632 let next_line_num = i + 1;
1633 let next_trimmed = next_line.trim();
1634
1635 // Stop at paragraph boundaries
1636 if next_trimmed.is_empty()
1637 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1638 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1639 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1640 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1641 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1642 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1643 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1644 || ctx
1645 .line_info(next_line_num)
1646 .is_some_and(|info| info.in_mkdocs_container())
1647 || (next_line_num > 0
1648 && next_line_num <= ctx.lines.len()
1649 && ctx.lines[next_line_num - 1].blockquote.is_some())
1650 || next_trimmed.starts_with('#')
1651 || TableUtils::is_potential_table_row(next_line)
1652 || is_list_item(next_trimmed)
1653 || is_horizontal_rule(next_trimmed)
1654 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1655 || is_template_directive_only(next_line)
1656 || is_standalone_attr_list(next_line)
1657 || is_snippet_block_delimiter(next_line)
1658 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
1659 {
1660 break;
1661 }
1662
1663 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1664 if i > 0 && has_hard_break(lines[i - 1]) {
1665 // Don't include lines after hard breaks in the same paragraph
1666 break;
1667 }
1668
1669 paragraph_lines.push(next_line);
1670 i += 1;
1671 }
1672
1673 // Combine paragraph lines into a single string for processing
1674 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1675 let paragraph_text = paragraph_lines.join(" ");
1676
1677 // Skip reflowing if this paragraph contains definition list items
1678 // Definition lists are multi-line structures that should not be joined
1679 let contains_definition_list = paragraph_lines
1680 .iter()
1681 .any(|line| crate::utils::is_definition_list_item(line));
1682
1683 if contains_definition_list {
1684 // Don't reflow definition lists - skip this paragraph
1685 i = paragraph_start + paragraph_lines.len();
1686 continue;
1687 }
1688
1689 // Skip reflowing if this paragraph contains MkDocs Snippets markers
1690 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1691 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1692
1693 if contains_snippets {
1694 // Don't reflow Snippets blocks - skip this paragraph
1695 i = paragraph_start + paragraph_lines.len();
1696 continue;
1697 }
1698
1699 // Check if this paragraph needs reflowing
1700 let needs_reflow = match config.reflow_mode {
1701 ReflowMode::Normalize => {
1702 // In normalize mode, reflow multi-line paragraphs
1703 paragraph_lines.len() > 1
1704 }
1705 ReflowMode::SentencePerLine => {
1706 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1707 // Note: we check the joined text because sentences can span multiple lines
1708 let sentences = split_into_sentences(¶graph_text);
1709
1710 // Always reflow if multiple sentences on one line
1711 if sentences.len() > 1 {
1712 true
1713 } else if paragraph_lines.len() > 1 {
1714 // For single-sentence paragraphs spanning multiple lines:
1715 // Reflow if they COULD fit on one line (respecting line-length constraint)
1716 if config.line_length.is_unlimited() {
1717 // No line-length constraint - always join single sentences
1718 true
1719 } else {
1720 // Only join if it fits within line-length
1721 let effective_length = self.calculate_effective_length(¶graph_text);
1722 effective_length <= config.line_length.get()
1723 }
1724 } else {
1725 false
1726 }
1727 }
1728 ReflowMode::SemanticLineBreaks => {
1729 let sentences = split_into_sentences(¶graph_text);
1730 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
1731 sentences.len() > 1
1732 || paragraph_lines.len() > 1
1733 || paragraph_lines
1734 .iter()
1735 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1736 }
1737 ReflowMode::Default => {
1738 // In default mode, only reflow if lines exceed limit
1739 paragraph_lines
1740 .iter()
1741 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1742 }
1743 };
1744
1745 if needs_reflow {
1746 // Calculate byte range for this paragraph
1747 // Use whole_line_range for each line and combine
1748 let start_range = line_index.whole_line_range(paragraph_start + 1);
1749 let end_line = paragraph_start + paragraph_lines.len() - 1;
1750
1751 // For the last line, we want to preserve any trailing newline
1752 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1753 // Last line without trailing newline - use line_text_range
1754 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1755 } else {
1756 // Not the last line or has trailing newline - use whole_line_range
1757 line_index.whole_line_range(end_line + 1)
1758 };
1759
1760 let byte_range = start_range.start..end_range.end;
1761
1762 // Check if the paragraph ends with a hard break and what type
1763 let hard_break_type = paragraph_lines.last().and_then(|line| {
1764 let line = line.strip_suffix('\r').unwrap_or(line);
1765 if line.ends_with('\\') {
1766 Some("\\")
1767 } else if line.ends_with(" ") {
1768 Some(" ")
1769 } else {
1770 None
1771 }
1772 });
1773
1774 // Reflow the paragraph
1775 // When line_length = 0 (no limit), use a very large value for reflow
1776 let reflow_line_length = if config.line_length.is_unlimited() {
1777 usize::MAX
1778 } else {
1779 config.line_length.get()
1780 };
1781 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1782 line_length: reflow_line_length,
1783 break_on_sentences: true,
1784 preserve_breaks: false,
1785 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1786 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1787 abbreviations: config.abbreviations_for_reflow(),
1788 };
1789 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1790
1791 // If the original paragraph ended with a hard break, preserve it
1792 // Preserve the original hard break format (backslash or two spaces)
1793 if let Some(break_marker) = hard_break_type
1794 && !reflowed.is_empty()
1795 {
1796 let last_idx = reflowed.len() - 1;
1797 if !has_hard_break(&reflowed[last_idx]) {
1798 reflowed[last_idx].push_str(break_marker);
1799 }
1800 }
1801
1802 let reflowed_text = reflowed.join("\n");
1803
1804 // Preserve trailing newline if the original paragraph had one
1805 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1806 format!("{reflowed_text}\n")
1807 } else {
1808 reflowed_text
1809 };
1810
1811 // Get the original text to compare
1812 let original_text = &ctx.content[byte_range.clone()];
1813
1814 // Only generate a warning if the replacement is different from the original
1815 if original_text != replacement {
1816 // Create warning with actual fix
1817 // In default mode, report the specific line that violates
1818 // In normalize mode, report the whole paragraph
1819 // In sentence-per-line mode, report the entire paragraph
1820 let (warning_line, warning_end_line) = match config.reflow_mode {
1821 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1822 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
1823 // Highlight the entire paragraph that needs reformatting
1824 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1825 }
1826 ReflowMode::Default => {
1827 // Find the first line that exceeds the limit
1828 let mut violating_line = paragraph_start;
1829 for (idx, line) in paragraph_lines.iter().enumerate() {
1830 if self.calculate_effective_length(line) > config.line_length.get() {
1831 violating_line = paragraph_start + idx;
1832 break;
1833 }
1834 }
1835 (violating_line + 1, violating_line + 1)
1836 }
1837 };
1838
1839 warnings.push(LintWarning {
1840 rule_name: Some(self.name().to_string()),
1841 message: match config.reflow_mode {
1842 ReflowMode::Normalize => format!(
1843 "Paragraph could be normalized to use line length of {} characters",
1844 config.line_length.get()
1845 ),
1846 ReflowMode::SentencePerLine => {
1847 let num_sentences = split_into_sentences(¶graph_text).len();
1848 if paragraph_lines.len() == 1 {
1849 // Single line with multiple sentences
1850 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1851 } else {
1852 let num_lines = paragraph_lines.len();
1853 // Multiple lines - could be split sentences or mixed
1854 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1855 }
1856 },
1857 ReflowMode::SemanticLineBreaks => {
1858 let num_sentences = split_into_sentences(¶graph_text).len();
1859 format!(
1860 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
1861 )
1862 },
1863 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1864 },
1865 line: warning_line,
1866 column: 1,
1867 end_line: warning_end_line,
1868 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1869 severity: Severity::Warning,
1870 fix: Some(crate::rule::Fix {
1871 range: byte_range,
1872 replacement,
1873 }),
1874 });
1875 }
1876 }
1877 }
1878
1879 warnings
1880 }
1881
1882 /// Calculate string length based on the configured length mode
1883 fn calculate_string_length(&self, s: &str) -> usize {
1884 match self.config.length_mode {
1885 LengthMode::Chars => s.chars().count(),
1886 LengthMode::Visual => s.width(),
1887 LengthMode::Bytes => s.len(),
1888 }
1889 }
1890
1891 /// Calculate effective line length
1892 ///
1893 /// Returns the actual display length of the line using the configured length mode.
1894 fn calculate_effective_length(&self, line: &str) -> usize {
1895 self.calculate_string_length(line)
1896 }
1897}