rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
7use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
8use crate::utils::range_utils::LineIndex;
9use crate::utils::range_utils::calculate_excess_range;
10use crate::utils::regex_cache::{
11 IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
12};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::split_into_sentences;
15use toml;
16
17mod helpers;
18pub mod md013_config;
19use helpers::{
20 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
21 split_into_segments, trim_preserving_hard_break,
22};
23pub use md013_config::MD013Config;
24use md013_config::{LengthMode, ReflowMode};
25
26#[cfg(test)]
27mod tests;
28use unicode_width::UnicodeWidthStr;
29
30#[derive(Clone, Default)]
31pub struct MD013LineLength {
32 pub(crate) config: MD013Config,
33}
34
35impl MD013LineLength {
36 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
37 Self {
38 config: MD013Config {
39 line_length: crate::types::LineLength::new(line_length),
40 code_blocks,
41 tables,
42 headings,
43 paragraphs: true, // Default to true for backwards compatibility
44 strict,
45 reflow: false,
46 reflow_mode: ReflowMode::default(),
47 length_mode: LengthMode::default(),
48 abbreviations: Vec::new(),
49 },
50 }
51 }
52
53 pub fn from_config_struct(config: MD013Config) -> Self {
54 Self { config }
55 }
56
57 fn should_ignore_line(
58 &self,
59 line: &str,
60 _lines: &[&str],
61 current_line: usize,
62 ctx: &crate::lint_context::LintContext,
63 ) -> bool {
64 if self.config.strict {
65 return false;
66 }
67
68 // Quick check for common patterns before expensive regex
69 let trimmed = line.trim();
70
71 // Only skip if the entire line is a URL (quick check first)
72 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
73 return true;
74 }
75
76 // Only skip if the entire line is an image reference (quick check first)
77 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
78 return true;
79 }
80
81 // Only skip if the entire line is a link reference (quick check first)
82 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
83 return true;
84 }
85
86 // Code blocks with long strings (only check if in code block)
87 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
88 && !trimmed.is_empty()
89 && !line.contains(' ')
90 && !line.contains('\t')
91 {
92 return true;
93 }
94
95 false
96 }
97
98 /// Check if rule should skip based on provided config (used for inline config support)
99 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
100 // Skip if content is empty
101 if ctx.content.is_empty() {
102 return true;
103 }
104
105 // For sentence-per-line or normalize mode, never skip based on line length
106 if config.reflow
107 && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
108 {
109 return false;
110 }
111
112 // Quick check: if total content is shorter than line limit, definitely skip
113 if ctx.content.len() <= config.line_length.get() {
114 return true;
115 }
116
117 // Skip if no line exceeds the limit
118 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
119 }
120}
121
122impl Rule for MD013LineLength {
123 fn name(&self) -> &'static str {
124 "MD013"
125 }
126
127 fn description(&self) -> &'static str {
128 "Line length should not be excessive"
129 }
130
131 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
132 let content = ctx.content;
133
134 // Parse inline configuration FIRST so we can use effective config for should_skip
135 let inline_config = crate::inline_config::InlineConfig::from_content(content);
136 let config_override = inline_config.get_rule_config("MD013");
137
138 // Apply configuration override if present
139 let effective_config = if let Some(json_config) = config_override {
140 if let Some(obj) = json_config.as_object() {
141 let mut config = self.config.clone();
142 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
143 config.line_length = crate::types::LineLength::new(line_length as usize);
144 }
145 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
146 config.code_blocks = code_blocks;
147 }
148 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
149 config.tables = tables;
150 }
151 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
152 config.headings = headings;
153 }
154 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
155 config.strict = strict;
156 }
157 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
158 config.reflow = reflow;
159 }
160 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
161 config.reflow_mode = match reflow_mode {
162 "default" => ReflowMode::Default,
163 "normalize" => ReflowMode::Normalize,
164 "sentence-per-line" => ReflowMode::SentencePerLine,
165 _ => ReflowMode::default(),
166 };
167 }
168 config
169 } else {
170 self.config.clone()
171 }
172 } else {
173 self.config.clone()
174 };
175
176 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
177 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
178 if self.should_skip_with_config(ctx, &effective_config)
179 && !(effective_config.reflow
180 && (effective_config.reflow_mode == ReflowMode::Normalize
181 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
182 {
183 return Ok(Vec::new());
184 }
185
186 // Direct implementation without DocumentStructure
187 let mut warnings = Vec::new();
188
189 // Special handling: line_length = 0 means "no line length limit"
190 // Skip all line length checks, but still allow reflow if enabled
191 let skip_length_checks = effective_config.line_length.is_unlimited();
192
193 // Pre-filter lines that could be problematic to avoid processing all lines
194 let mut candidate_lines = Vec::new();
195 if !skip_length_checks {
196 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
197 // Skip front matter - it should never be linted
198 if line_info.in_front_matter {
199 continue;
200 }
201
202 // Quick length check first
203 if line_info.byte_len > effective_config.line_length.get() {
204 candidate_lines.push(line_idx);
205 }
206 }
207 }
208
209 // If no candidate lines and not in normalize or sentence-per-line mode, early return
210 if candidate_lines.is_empty()
211 && !(effective_config.reflow
212 && (effective_config.reflow_mode == ReflowMode::Normalize
213 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
214 {
215 return Ok(warnings);
216 }
217
218 // Use ctx.lines if available for better performance
219 let lines: Vec<&str> = if !ctx.lines.is_empty() {
220 ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
221 } else {
222 content.lines().collect()
223 };
224
225 // Create a quick lookup set for heading lines
226 // We need this for both the heading skip check AND the paragraphs check
227 let heading_lines_set: std::collections::HashSet<usize> = ctx
228 .lines
229 .iter()
230 .enumerate()
231 .filter(|(_, line)| line.heading.is_some())
232 .map(|(idx, _)| idx + 1)
233 .collect();
234
235 // Use pre-computed table blocks from context
236 // We need this for both the table skip check AND the paragraphs check
237 let table_blocks = &ctx.table_blocks;
238 let mut table_lines_set = std::collections::HashSet::new();
239 for table in table_blocks {
240 table_lines_set.insert(table.header_line + 1);
241 table_lines_set.insert(table.delimiter_line + 1);
242 for &line in &table.content_lines {
243 table_lines_set.insert(line + 1);
244 }
245 }
246
247 // Process candidate lines for line length checks
248 for &line_idx in &candidate_lines {
249 let line_number = line_idx + 1;
250 let line = lines[line_idx];
251
252 // Calculate effective length excluding unbreakable URLs
253 let effective_length = self.calculate_effective_length(line);
254
255 // Use single line length limit for all content
256 let line_limit = effective_config.line_length.get();
257
258 // Skip short lines immediately (double-check after effective length calculation)
259 if effective_length <= line_limit {
260 continue;
261 }
262
263 // Skip mkdocstrings blocks (already handled by LintContext)
264 if ctx.lines[line_idx].in_mkdocstrings {
265 continue;
266 }
267
268 // Skip various block types efficiently
269 if !effective_config.strict {
270 // Skip setext heading underlines
271 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
272 continue;
273 }
274
275 // Skip block elements according to config flags
276 // The flags mean: true = check these elements, false = skip these elements
277 // So we skip when the flag is FALSE and the line is in that element type
278 if (!effective_config.headings && heading_lines_set.contains(&line_number))
279 || (!effective_config.code_blocks
280 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
281 || (!effective_config.tables && table_lines_set.contains(&line_number))
282 || ctx.lines[line_number - 1].blockquote.is_some()
283 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
284 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
285 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
286 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
287 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
288 {
289 continue;
290 }
291
292 // Check if this is a paragraph/regular text line
293 // If paragraphs = false, skip lines that are NOT in special blocks
294 if !effective_config.paragraphs {
295 let is_special_block = heading_lines_set.contains(&line_number)
296 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
297 || table_lines_set.contains(&line_number)
298 || ctx.lines[line_number - 1].blockquote.is_some()
299 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
300 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
301 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
302 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
303 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
304 || ctx
305 .line_info(line_number)
306 .is_some_and(|info| info.in_mkdocs_container());
307
308 // Skip regular paragraph text when paragraphs = false
309 if !is_special_block {
310 continue;
311 }
312 }
313
314 // Skip lines that are only a URL, image ref, or link ref
315 if self.should_ignore_line(line, &lines, line_idx, ctx) {
316 continue;
317 }
318 }
319
320 // In sentence-per-line mode, check if this is a single long sentence
321 // If so, emit a warning without a fix (user must manually rephrase)
322 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
323 let sentences = split_into_sentences(line.trim());
324 if sentences.len() == 1 {
325 // Single sentence that's too long - warn but don't auto-fix
326 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
327
328 let (start_line, start_col, end_line, end_col) =
329 calculate_excess_range(line_number, line, line_limit);
330
331 warnings.push(LintWarning {
332 rule_name: Some(self.name().to_string()),
333 message,
334 line: start_line,
335 column: start_col,
336 end_line,
337 end_column: end_col,
338 severity: Severity::Warning,
339 fix: None, // No auto-fix for long single sentences
340 });
341 continue;
342 }
343 // Multiple sentences will be handled by paragraph-based reflow
344 continue;
345 }
346
347 // Don't provide fix for individual lines when reflow is enabled
348 // Paragraph-based fixes will be handled separately
349 let fix = None;
350
351 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
352
353 // Calculate precise character range for the excess portion
354 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
355
356 warnings.push(LintWarning {
357 rule_name: Some(self.name().to_string()),
358 message,
359 line: start_line,
360 column: start_col,
361 end_line,
362 end_column: end_col,
363 severity: Severity::Warning,
364 fix,
365 });
366 }
367
368 // If reflow is enabled, generate paragraph-based fixes
369 if effective_config.reflow {
370 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
371 // Merge paragraph warnings with line warnings, removing duplicates
372 for pw in paragraph_warnings {
373 // Remove any line warnings that overlap with this paragraph
374 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
375 warnings.push(pw);
376 }
377 }
378
379 Ok(warnings)
380 }
381
382 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
383 // For CLI usage, apply fixes from warnings
384 // LSP will use the warning-based fixes directly
385 let warnings = self.check(ctx)?;
386
387 // If there are no fixes, return content unchanged
388 if !warnings.iter().any(|w| w.fix.is_some()) {
389 return Ok(ctx.content.to_string());
390 }
391
392 // Apply warning-based fixes
393 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
394 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
395 }
396
397 fn as_any(&self) -> &dyn std::any::Any {
398 self
399 }
400
401 fn category(&self) -> RuleCategory {
402 RuleCategory::Whitespace
403 }
404
405 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
406 self.should_skip_with_config(ctx, &self.config)
407 }
408
409 fn default_config_section(&self) -> Option<(String, toml::Value)> {
410 let default_config = MD013Config::default();
411 let json_value = serde_json::to_value(&default_config).ok()?;
412 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
413
414 if let toml::Value::Table(table) = toml_value {
415 if !table.is_empty() {
416 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
417 } else {
418 None
419 }
420 } else {
421 None
422 }
423 }
424
425 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
426 let mut aliases = std::collections::HashMap::new();
427 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
428 Some(aliases)
429 }
430
431 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
432 where
433 Self: Sized,
434 {
435 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
436 // Use global line_length if rule-specific config still has default value
437 if rule_config.line_length.get() == 80 {
438 rule_config.line_length = config.global.line_length;
439 }
440 Box::new(Self::from_config_struct(rule_config))
441 }
442}
443
444impl MD013LineLength {
445 /// Generate paragraph-based fixes
446 fn generate_paragraph_fixes(
447 &self,
448 ctx: &crate::lint_context::LintContext,
449 config: &MD013Config,
450 lines: &[&str],
451 ) -> Vec<LintWarning> {
452 let mut warnings = Vec::new();
453 let line_index = LineIndex::new(ctx.content);
454
455 let mut i = 0;
456 while i < lines.len() {
457 let line_num = i + 1;
458
459 // Skip special structures (but NOT MkDocs containers - those get special handling)
460 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
461 info.in_code_block
462 || info.in_front_matter
463 || info.in_html_block
464 || info.in_html_comment
465 || info.in_esm_block
466 || info.in_jsx_expression
467 || info.in_mdx_comment
468 });
469
470 if should_skip_due_to_line_info
471 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
472 || lines[i].trim().starts_with('#')
473 || TableUtils::is_potential_table_row(lines[i])
474 || lines[i].trim().is_empty()
475 || is_horizontal_rule(lines[i].trim())
476 || is_template_directive_only(lines[i])
477 {
478 i += 1;
479 continue;
480 }
481
482 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
483 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
484 let container_start = i;
485
486 // Detect the actual indent level from the first content line
487 // (supports nested admonitions with 8+ spaces)
488 let first_line = lines[i];
489 let base_indent_len = first_line.len() - first_line.trim_start().len();
490 let base_indent: String = " ".repeat(base_indent_len);
491
492 // Collect consecutive MkDocs container paragraph lines
493 let mut container_lines: Vec<&str> = Vec::new();
494 while i < lines.len() {
495 let current_line_num = i + 1;
496 let line_info = ctx.line_info(current_line_num);
497
498 // Stop if we leave the MkDocs container
499 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
500 break;
501 }
502
503 let line = lines[i];
504
505 // Stop at paragraph boundaries within the container
506 if line.trim().is_empty() {
507 break;
508 }
509
510 // Skip list items, code blocks, headings within containers
511 if is_list_item(line.trim())
512 || line.trim().starts_with("```")
513 || line.trim().starts_with("~~~")
514 || line.trim().starts_with('#')
515 {
516 break;
517 }
518
519 container_lines.push(line);
520 i += 1;
521 }
522
523 if container_lines.is_empty() {
524 continue;
525 }
526
527 // Strip the base indent from each line and join for reflow
528 let stripped_lines: Vec<&str> = container_lines
529 .iter()
530 .map(|line| {
531 if line.starts_with(&base_indent) {
532 &line[base_indent_len..]
533 } else {
534 line.trim_start()
535 }
536 })
537 .collect();
538 let paragraph_text = stripped_lines.join(" ");
539
540 // Check if reflow is needed
541 let needs_reflow = match config.reflow_mode {
542 ReflowMode::Normalize => container_lines.len() > 1,
543 ReflowMode::SentencePerLine => {
544 let sentences = split_into_sentences(¶graph_text);
545 sentences.len() > 1 || container_lines.len() > 1
546 }
547 ReflowMode::Default => container_lines
548 .iter()
549 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
550 };
551
552 if !needs_reflow {
553 continue;
554 }
555
556 // Calculate byte range for this container paragraph
557 let start_range = line_index.whole_line_range(container_start + 1);
558 let end_line = container_start + container_lines.len() - 1;
559 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
560 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
561 } else {
562 line_index.whole_line_range(end_line + 1)
563 };
564 let byte_range = start_range.start..end_range.end;
565
566 // Reflow with adjusted line length (accounting for the 4-space indent)
567 let reflow_line_length = if config.line_length.is_unlimited() {
568 usize::MAX
569 } else {
570 config.line_length.get().saturating_sub(base_indent_len).max(1)
571 };
572 let reflow_options = crate::utils::text_reflow::ReflowOptions {
573 line_length: reflow_line_length,
574 break_on_sentences: true,
575 preserve_breaks: false,
576 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
577 abbreviations: config.abbreviations_for_reflow(),
578 };
579 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
580
581 // Re-add the 4-space indent to each reflowed line
582 let reflowed_with_indent: Vec<String> =
583 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
584 let reflowed_text = reflowed_with_indent.join("\n");
585
586 // Preserve trailing newline
587 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
588 format!("{reflowed_text}\n")
589 } else {
590 reflowed_text
591 };
592
593 // Only generate a warning if the replacement is different
594 let original_text = &ctx.content[byte_range.clone()];
595 if original_text != replacement {
596 warnings.push(LintWarning {
597 rule_name: Some(self.name().to_string()),
598 message: format!(
599 "Line length {} exceeds {} characters (in MkDocs container)",
600 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
601 config.line_length.get()
602 ),
603 line: container_start + 1,
604 column: 1,
605 end_line: end_line + 1,
606 end_column: lines[end_line].len() + 1,
607 severity: Severity::Warning,
608 fix: Some(crate::rule::Fix {
609 range: byte_range,
610 replacement,
611 }),
612 });
613 }
614 continue;
615 }
616
617 // Helper function to detect semantic line markers
618 let is_semantic_line = |content: &str| -> bool {
619 let trimmed = content.trim_start();
620 let semantic_markers = [
621 "NOTE:",
622 "WARNING:",
623 "IMPORTANT:",
624 "CAUTION:",
625 "TIP:",
626 "DANGER:",
627 "HINT:",
628 "INFO:",
629 ];
630 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
631 };
632
633 // Helper function to detect fence markers (opening or closing)
634 let is_fence_marker = |content: &str| -> bool {
635 let trimmed = content.trim_start();
636 trimmed.starts_with("```") || trimmed.starts_with("~~~")
637 };
638
639 // Check if this is a list item - handle it specially
640 let trimmed = lines[i].trim();
641 if is_list_item(trimmed) {
642 // Collect the entire list item including continuation lines
643 let list_start = i;
644 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
645 let marker_len = marker.len();
646
647 // Track lines and their types (content, code block, fence, nested list)
648 #[derive(Clone)]
649 enum LineType {
650 Content(String),
651 CodeBlock(String, usize), // content and original indent
652 NestedListItem(String, usize), // full line content and original indent
653 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
654 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
655 Empty,
656 }
657
658 let mut actual_indent: Option<usize> = None;
659 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
660 i += 1;
661
662 // Collect continuation lines using ctx.lines for metadata
663 while i < lines.len() {
664 let line_info = &ctx.lines[i];
665
666 // Use pre-computed is_blank from ctx
667 if line_info.is_blank {
668 // Empty line - check if next line is indented (part of list item)
669 if i + 1 < lines.len() {
670 let next_info = &ctx.lines[i + 1];
671
672 // Check if next line is indented enough to be continuation
673 if !next_info.is_blank && next_info.indent >= marker_len {
674 // This blank line is between paragraphs/blocks in the list item
675 list_item_lines.push(LineType::Empty);
676 i += 1;
677 continue;
678 }
679 }
680 // No indented line after blank, end of list item
681 break;
682 }
683
684 // Use pre-computed indent from ctx
685 let indent = line_info.indent;
686
687 // Valid continuation must be indented at least marker_len
688 if indent >= marker_len {
689 let trimmed = line_info.content(ctx.content).trim();
690
691 // Use pre-computed in_code_block from ctx
692 if line_info.in_code_block {
693 list_item_lines.push(LineType::CodeBlock(
694 line_info.content(ctx.content)[indent..].to_string(),
695 indent,
696 ));
697 i += 1;
698 continue;
699 }
700
701 // Check if this is a SIBLING list item (breaks parent)
702 // Nested lists are indented >= marker_len and are PART of the parent item
703 // Siblings are at indent < marker_len (at or before parent marker)
704 if is_list_item(trimmed) && indent < marker_len {
705 // This is a sibling item at same or higher level - end parent item
706 break;
707 }
708
709 // Check if this is a NESTED list item marker
710 // Nested lists should be processed separately UNLESS they're part of a
711 // multi-paragraph list item (indicated by a blank line before them OR
712 // it's a continuation of an already-started nested list)
713 if is_list_item(trimmed) && indent >= marker_len {
714 // Check if there was a blank line before this (multi-paragraph context)
715 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
716
717 // Check if we've already seen nested list content (another nested item)
718 let has_nested_content = list_item_lines.iter().any(|line| {
719 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
720 || matches!(line, LineType::NestedListItem(_, _))
721 });
722
723 if !has_blank_before && !has_nested_content {
724 // Single-paragraph context with no prior nested items: starts a new item
725 // End parent collection; nested list will be processed next
726 break;
727 }
728 // else: multi-paragraph context or continuation of nested list, keep collecting
729 // Mark this as a nested list item to preserve its structure
730 list_item_lines.push(LineType::NestedListItem(
731 line_info.content(ctx.content)[indent..].to_string(),
732 indent,
733 ));
734 i += 1;
735 continue;
736 }
737
738 // Normal continuation: marker_len to marker_len+3
739 if indent <= marker_len + 3 {
740 // Set actual_indent from first non-code continuation if not set
741 if actual_indent.is_none() {
742 actual_indent = Some(indent);
743 }
744
745 // Extract content (remove indentation and trailing whitespace)
746 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
747 // See: https://github.com/rvben/rumdl/issues/76
748 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
749
750 // Check if this is a fence marker (opening or closing)
751 // These should be treated as code block lines, not paragraph content
752 if is_fence_marker(&content) {
753 list_item_lines.push(LineType::CodeBlock(content, indent));
754 }
755 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
756 else if is_semantic_line(&content) {
757 list_item_lines.push(LineType::SemanticLine(content));
758 }
759 // Check if this is a snippet block delimiter (-8<- or --8<--)
760 // These must be preserved on their own lines for MkDocs Snippets extension
761 else if is_snippet_block_delimiter(&content) {
762 list_item_lines.push(LineType::SnippetLine(content));
763 } else {
764 list_item_lines.push(LineType::Content(content));
765 }
766 i += 1;
767 } else {
768 // indent >= marker_len + 4: indented code block
769 list_item_lines.push(LineType::CodeBlock(
770 line_info.content(ctx.content)[indent..].to_string(),
771 indent,
772 ));
773 i += 1;
774 }
775 } else {
776 // Not indented enough, end of list item
777 break;
778 }
779 }
780
781 // Use detected indent or fallback to marker length
782 let indent_size = actual_indent.unwrap_or(marker_len);
783 let expected_indent = " ".repeat(indent_size);
784
785 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
786 #[derive(Clone)]
787 enum Block {
788 Paragraph(Vec<String>),
789 Code {
790 lines: Vec<(String, usize)>, // (content, indent) pairs
791 has_preceding_blank: bool, // Whether there was a blank line before this block
792 },
793 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
794 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
795 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
796 Html {
797 lines: Vec<String>, // HTML content preserved exactly as-is
798 has_preceding_blank: bool, // Whether there was a blank line before this block
799 },
800 }
801
802 // HTML tag detection helpers
803 // Block-level HTML tags that should trigger HTML block detection
804 const BLOCK_LEVEL_TAGS: &[&str] = &[
805 "div",
806 "details",
807 "summary",
808 "section",
809 "article",
810 "header",
811 "footer",
812 "nav",
813 "aside",
814 "main",
815 "table",
816 "thead",
817 "tbody",
818 "tfoot",
819 "tr",
820 "td",
821 "th",
822 "ul",
823 "ol",
824 "li",
825 "dl",
826 "dt",
827 "dd",
828 "pre",
829 "blockquote",
830 "figure",
831 "figcaption",
832 "form",
833 "fieldset",
834 "legend",
835 "hr",
836 "p",
837 "h1",
838 "h2",
839 "h3",
840 "h4",
841 "h5",
842 "h6",
843 "style",
844 "script",
845 "noscript",
846 ];
847
848 fn is_block_html_opening_tag(line: &str) -> Option<String> {
849 let trimmed = line.trim();
850
851 // Check for HTML comments
852 if trimmed.starts_with("<!--") {
853 return Some("!--".to_string());
854 }
855
856 // Check for opening tags
857 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
858 // Extract tag name from <tagname ...> or <tagname>
859 let after_bracket = &trimmed[1..];
860 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
861 let tag_name = after_bracket[..end].to_lowercase();
862
863 // Only treat as block if it's a known block-level tag
864 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
865 return Some(tag_name);
866 }
867 }
868 }
869 None
870 }
871
872 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
873 let trimmed = line.trim();
874
875 // Special handling for HTML comments
876 if tag_name == "!--" {
877 return trimmed.ends_with("-->");
878 }
879
880 // Check for closing tags: </tagname> or </tagname ...>
881 trimmed.starts_with(&format!("</{tag_name}>"))
882 || trimmed.starts_with(&format!("</{tag_name} "))
883 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
884 }
885
886 fn is_self_closing_tag(line: &str) -> bool {
887 let trimmed = line.trim();
888 trimmed.ends_with("/>")
889 }
890
891 let mut blocks: Vec<Block> = Vec::new();
892 let mut current_paragraph: Vec<String> = Vec::new();
893 let mut current_code_block: Vec<(String, usize)> = Vec::new();
894 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
895 let mut current_html_block: Vec<String> = Vec::new();
896 let mut html_tag_stack: Vec<String> = Vec::new();
897 let mut in_code = false;
898 let mut in_nested_list = false;
899 let mut in_html_block = false;
900 let mut had_preceding_blank = false; // Track if we just saw an empty line
901 let mut code_block_has_preceding_blank = false; // Track blank before current code block
902 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
903
904 for line in &list_item_lines {
905 match line {
906 LineType::Empty => {
907 if in_code {
908 current_code_block.push((String::new(), 0));
909 } else if in_nested_list {
910 current_nested_list.push((String::new(), 0));
911 } else if in_html_block {
912 // Allow blank lines inside HTML blocks
913 current_html_block.push(String::new());
914 } else if !current_paragraph.is_empty() {
915 blocks.push(Block::Paragraph(current_paragraph.clone()));
916 current_paragraph.clear();
917 }
918 // Mark that we saw a blank line
919 had_preceding_blank = true;
920 }
921 LineType::Content(content) => {
922 // Check if we're currently in an HTML block
923 if in_html_block {
924 current_html_block.push(content.clone());
925
926 // Check if this line closes any open HTML tags
927 if let Some(last_tag) = html_tag_stack.last() {
928 if is_html_closing_tag(content, last_tag) {
929 html_tag_stack.pop();
930
931 // If stack is empty, HTML block is complete
932 if html_tag_stack.is_empty() {
933 blocks.push(Block::Html {
934 lines: current_html_block.clone(),
935 has_preceding_blank: html_block_has_preceding_blank,
936 });
937 current_html_block.clear();
938 in_html_block = false;
939 }
940 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
941 // Nested opening tag within HTML block
942 if !is_self_closing_tag(content) {
943 html_tag_stack.push(new_tag);
944 }
945 }
946 }
947 had_preceding_blank = false;
948 } else {
949 // Not in HTML block - check if this line starts one
950 if let Some(tag_name) = is_block_html_opening_tag(content) {
951 // Flush current paragraph before starting HTML block
952 if in_code {
953 blocks.push(Block::Code {
954 lines: current_code_block.clone(),
955 has_preceding_blank: code_block_has_preceding_blank,
956 });
957 current_code_block.clear();
958 in_code = false;
959 } else if in_nested_list {
960 blocks.push(Block::NestedList(current_nested_list.clone()));
961 current_nested_list.clear();
962 in_nested_list = false;
963 } else if !current_paragraph.is_empty() {
964 blocks.push(Block::Paragraph(current_paragraph.clone()));
965 current_paragraph.clear();
966 }
967
968 // Start new HTML block
969 in_html_block = true;
970 html_block_has_preceding_blank = had_preceding_blank;
971 current_html_block.push(content.clone());
972
973 // Check if it's self-closing or needs a closing tag
974 if is_self_closing_tag(content) {
975 // Self-closing tag - complete the HTML block immediately
976 blocks.push(Block::Html {
977 lines: current_html_block.clone(),
978 has_preceding_blank: html_block_has_preceding_blank,
979 });
980 current_html_block.clear();
981 in_html_block = false;
982 } else {
983 // Regular opening tag - push to stack
984 html_tag_stack.push(tag_name);
985 }
986 } else {
987 // Regular content line - add to paragraph
988 if in_code {
989 // Switching from code to content
990 blocks.push(Block::Code {
991 lines: current_code_block.clone(),
992 has_preceding_blank: code_block_has_preceding_blank,
993 });
994 current_code_block.clear();
995 in_code = false;
996 } else if in_nested_list {
997 // Switching from nested list to content
998 blocks.push(Block::NestedList(current_nested_list.clone()));
999 current_nested_list.clear();
1000 in_nested_list = false;
1001 }
1002 current_paragraph.push(content.clone());
1003 }
1004 had_preceding_blank = false; // Reset after content
1005 }
1006 }
1007 LineType::CodeBlock(content, indent) => {
1008 if in_nested_list {
1009 // Switching from nested list to code
1010 blocks.push(Block::NestedList(current_nested_list.clone()));
1011 current_nested_list.clear();
1012 in_nested_list = false;
1013 } else if in_html_block {
1014 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1015 blocks.push(Block::Html {
1016 lines: current_html_block.clone(),
1017 has_preceding_blank: html_block_has_preceding_blank,
1018 });
1019 current_html_block.clear();
1020 html_tag_stack.clear();
1021 in_html_block = false;
1022 }
1023 if !in_code {
1024 // Switching from content to code
1025 if !current_paragraph.is_empty() {
1026 blocks.push(Block::Paragraph(current_paragraph.clone()));
1027 current_paragraph.clear();
1028 }
1029 in_code = true;
1030 // Record whether there was a blank line before this code block
1031 code_block_has_preceding_blank = had_preceding_blank;
1032 }
1033 current_code_block.push((content.clone(), *indent));
1034 had_preceding_blank = false; // Reset after code
1035 }
1036 LineType::NestedListItem(content, indent) => {
1037 if in_code {
1038 // Switching from code to nested list
1039 blocks.push(Block::Code {
1040 lines: current_code_block.clone(),
1041 has_preceding_blank: code_block_has_preceding_blank,
1042 });
1043 current_code_block.clear();
1044 in_code = false;
1045 } else if in_html_block {
1046 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1047 blocks.push(Block::Html {
1048 lines: current_html_block.clone(),
1049 has_preceding_blank: html_block_has_preceding_blank,
1050 });
1051 current_html_block.clear();
1052 html_tag_stack.clear();
1053 in_html_block = false;
1054 }
1055 if !in_nested_list {
1056 // Switching from content to nested list
1057 if !current_paragraph.is_empty() {
1058 blocks.push(Block::Paragraph(current_paragraph.clone()));
1059 current_paragraph.clear();
1060 }
1061 in_nested_list = true;
1062 }
1063 current_nested_list.push((content.clone(), *indent));
1064 had_preceding_blank = false; // Reset after nested list
1065 }
1066 LineType::SemanticLine(content) => {
1067 // Semantic lines are standalone - flush any current block and add as separate block
1068 if in_code {
1069 blocks.push(Block::Code {
1070 lines: current_code_block.clone(),
1071 has_preceding_blank: code_block_has_preceding_blank,
1072 });
1073 current_code_block.clear();
1074 in_code = false;
1075 } else if in_nested_list {
1076 blocks.push(Block::NestedList(current_nested_list.clone()));
1077 current_nested_list.clear();
1078 in_nested_list = false;
1079 } else if in_html_block {
1080 blocks.push(Block::Html {
1081 lines: current_html_block.clone(),
1082 has_preceding_blank: html_block_has_preceding_blank,
1083 });
1084 current_html_block.clear();
1085 html_tag_stack.clear();
1086 in_html_block = false;
1087 } else if !current_paragraph.is_empty() {
1088 blocks.push(Block::Paragraph(current_paragraph.clone()));
1089 current_paragraph.clear();
1090 }
1091 // Add semantic line as its own block
1092 blocks.push(Block::SemanticLine(content.clone()));
1093 had_preceding_blank = false; // Reset after semantic line
1094 }
1095 LineType::SnippetLine(content) => {
1096 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1097 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1098 if in_code {
1099 blocks.push(Block::Code {
1100 lines: current_code_block.clone(),
1101 has_preceding_blank: code_block_has_preceding_blank,
1102 });
1103 current_code_block.clear();
1104 in_code = false;
1105 } else if in_nested_list {
1106 blocks.push(Block::NestedList(current_nested_list.clone()));
1107 current_nested_list.clear();
1108 in_nested_list = false;
1109 } else if in_html_block {
1110 blocks.push(Block::Html {
1111 lines: current_html_block.clone(),
1112 has_preceding_blank: html_block_has_preceding_blank,
1113 });
1114 current_html_block.clear();
1115 html_tag_stack.clear();
1116 in_html_block = false;
1117 } else if !current_paragraph.is_empty() {
1118 blocks.push(Block::Paragraph(current_paragraph.clone()));
1119 current_paragraph.clear();
1120 }
1121 // Add snippet line as its own block
1122 blocks.push(Block::SnippetLine(content.clone()));
1123 had_preceding_blank = false;
1124 }
1125 }
1126 }
1127
1128 // Push remaining block
1129 if in_code && !current_code_block.is_empty() {
1130 blocks.push(Block::Code {
1131 lines: current_code_block,
1132 has_preceding_blank: code_block_has_preceding_blank,
1133 });
1134 } else if in_nested_list && !current_nested_list.is_empty() {
1135 blocks.push(Block::NestedList(current_nested_list));
1136 } else if in_html_block && !current_html_block.is_empty() {
1137 // If we still have an unclosed HTML block, push it anyway
1138 // (malformed HTML - missing closing tag)
1139 blocks.push(Block::Html {
1140 lines: current_html_block,
1141 has_preceding_blank: html_block_has_preceding_blank,
1142 });
1143 } else if !current_paragraph.is_empty() {
1144 blocks.push(Block::Paragraph(current_paragraph));
1145 }
1146
1147 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1148 let content_lines: Vec<String> = list_item_lines
1149 .iter()
1150 .filter_map(|line| {
1151 if let LineType::Content(s) = line {
1152 Some(s.clone())
1153 } else {
1154 None
1155 }
1156 })
1157 .collect();
1158
1159 // Check if we need to reflow this list item
1160 // We check the combined content to see if it exceeds length limits
1161 let combined_content = content_lines.join(" ").trim().to_string();
1162 let full_line = format!("{marker}{combined_content}");
1163
1164 // Helper to check if we should reflow in normalize mode
1165 let should_normalize = || {
1166 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1167 // DO normalize if it has plain text content that spans multiple lines
1168 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1169 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1170 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1171 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1172 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1173
1174 // If we have nested lists, code blocks, semantic lines, or snippet lines but no paragraphs, don't normalize
1175 if (has_nested_lists || has_code_blocks || has_semantic_lines || has_snippet_lines)
1176 && !has_paragraphs
1177 {
1178 return false;
1179 }
1180
1181 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1182 if has_paragraphs {
1183 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1184 if paragraph_count > 1 {
1185 // Multiple paragraph blocks should be normalized
1186 return true;
1187 }
1188
1189 // Single paragraph block: normalize if it has multiple content lines
1190 if content_lines.len() > 1 {
1191 return true;
1192 }
1193 }
1194
1195 false
1196 };
1197
1198 let needs_reflow = match config.reflow_mode {
1199 ReflowMode::Normalize => {
1200 // Only reflow if:
1201 // 1. The combined line would exceed the limit, OR
1202 // 2. The list item should be normalized (has multi-line plain text)
1203 let combined_length = self.calculate_effective_length(&full_line);
1204 if combined_length > config.line_length.get() {
1205 true
1206 } else {
1207 should_normalize()
1208 }
1209 }
1210 ReflowMode::SentencePerLine => {
1211 // Check if list item has multiple sentences
1212 let sentences = split_into_sentences(&combined_content);
1213 sentences.len() > 1
1214 }
1215 ReflowMode::Default => {
1216 // In default mode, only reflow if any individual line exceeds limit
1217 // Check the original lines, not the combined content
1218 (list_start..i)
1219 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1220 }
1221 };
1222
1223 if needs_reflow {
1224 let start_range = line_index.whole_line_range(list_start + 1);
1225 let end_line = i - 1;
1226 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1227 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1228 } else {
1229 line_index.whole_line_range(end_line + 1)
1230 };
1231 let byte_range = start_range.start..end_range.end;
1232
1233 // Reflow each block (paragraphs only, preserve code blocks)
1234 // When line_length = 0 (no limit), use a very large value for reflow
1235 let reflow_line_length = if config.line_length.is_unlimited() {
1236 usize::MAX
1237 } else {
1238 config.line_length.get().saturating_sub(indent_size).max(1)
1239 };
1240 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1241 line_length: reflow_line_length,
1242 break_on_sentences: true,
1243 preserve_breaks: false,
1244 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1245 abbreviations: config.abbreviations_for_reflow(),
1246 };
1247
1248 let mut result: Vec<String> = Vec::new();
1249 let mut is_first_block = true;
1250
1251 for (block_idx, block) in blocks.iter().enumerate() {
1252 match block {
1253 Block::Paragraph(para_lines) => {
1254 // Split the paragraph into segments at hard break boundaries
1255 // Each segment can be reflowed independently
1256 let segments = split_into_segments(para_lines);
1257
1258 for (segment_idx, segment) in segments.iter().enumerate() {
1259 // Check if this segment ends with a hard break and what type
1260 let hard_break_type = segment.last().and_then(|line| {
1261 let line = line.strip_suffix('\r').unwrap_or(line);
1262 if line.ends_with('\\') {
1263 Some("\\")
1264 } else if line.ends_with(" ") {
1265 Some(" ")
1266 } else {
1267 None
1268 }
1269 });
1270
1271 // Join and reflow the segment (removing the hard break marker for processing)
1272 let segment_for_reflow: Vec<String> = segment
1273 .iter()
1274 .map(|line| {
1275 // Strip hard break marker (2 spaces or backslash) for reflow processing
1276 if line.ends_with('\\') {
1277 line[..line.len() - 1].trim_end().to_string()
1278 } else if line.ends_with(" ") {
1279 line[..line.len() - 2].trim_end().to_string()
1280 } else {
1281 line.clone()
1282 }
1283 })
1284 .collect();
1285
1286 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1287 if !segment_text.is_empty() {
1288 let reflowed =
1289 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1290
1291 if is_first_block && segment_idx == 0 {
1292 // First segment of first block starts with marker
1293 result.push(format!("{marker}{}", reflowed[0]));
1294 for line in reflowed.iter().skip(1) {
1295 result.push(format!("{expected_indent}{line}"));
1296 }
1297 is_first_block = false;
1298 } else {
1299 // Subsequent segments
1300 for line in reflowed {
1301 result.push(format!("{expected_indent}{line}"));
1302 }
1303 }
1304
1305 // If this segment had a hard break, add it back to the last line
1306 // Preserve the original hard break format (backslash or two spaces)
1307 if let Some(break_marker) = hard_break_type
1308 && let Some(last_line) = result.last_mut()
1309 {
1310 last_line.push_str(break_marker);
1311 }
1312 }
1313 }
1314
1315 // Add blank line after paragraph block if there's a next block
1316 // BUT: check if next block is a code block that doesn't want a preceding blank
1317 // Also don't add blank lines before snippet lines (they should stay tight)
1318 if block_idx < blocks.len() - 1 {
1319 let next_block = &blocks[block_idx + 1];
1320 let should_add_blank = match next_block {
1321 Block::Code {
1322 has_preceding_blank, ..
1323 } => *has_preceding_blank,
1324 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1325 _ => true, // For all other blocks, add blank line
1326 };
1327 if should_add_blank {
1328 result.push(String::new());
1329 }
1330 }
1331 }
1332 Block::Code {
1333 lines: code_lines,
1334 has_preceding_blank: _,
1335 } => {
1336 // Preserve code blocks as-is with original indentation
1337 // NOTE: Blank line before code block is handled by the previous block
1338 // (see paragraph block's logic above)
1339
1340 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1341 if is_first_block && idx == 0 {
1342 // First line of first block gets marker
1343 result.push(format!(
1344 "{marker}{}",
1345 " ".repeat(orig_indent - marker_len) + content
1346 ));
1347 is_first_block = false;
1348 } else if content.is_empty() {
1349 result.push(String::new());
1350 } else {
1351 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1352 }
1353 }
1354 }
1355 Block::NestedList(nested_items) => {
1356 // Preserve nested list items as-is with original indentation
1357 if !is_first_block {
1358 result.push(String::new());
1359 }
1360
1361 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1362 if is_first_block && idx == 0 {
1363 // First line of first block gets marker
1364 result.push(format!(
1365 "{marker}{}",
1366 " ".repeat(orig_indent - marker_len) + content
1367 ));
1368 is_first_block = false;
1369 } else if content.is_empty() {
1370 result.push(String::new());
1371 } else {
1372 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1373 }
1374 }
1375
1376 // Add blank line after nested list if there's a next block
1377 // Check if next block is a code block that doesn't want a preceding blank
1378 if block_idx < blocks.len() - 1 {
1379 let next_block = &blocks[block_idx + 1];
1380 let should_add_blank = match next_block {
1381 Block::Code {
1382 has_preceding_blank, ..
1383 } => *has_preceding_blank,
1384 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1385 _ => true, // For all other blocks, add blank line
1386 };
1387 if should_add_blank {
1388 result.push(String::new());
1389 }
1390 }
1391 }
1392 Block::SemanticLine(content) => {
1393 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1394 // Add blank line before if not first block
1395 if !is_first_block {
1396 result.push(String::new());
1397 }
1398
1399 if is_first_block {
1400 // First block starts with marker
1401 result.push(format!("{marker}{content}"));
1402 is_first_block = false;
1403 } else {
1404 // Subsequent blocks use expected indent
1405 result.push(format!("{expected_indent}{content}"));
1406 }
1407
1408 // Add blank line after semantic line if there's a next block
1409 // Check if next block is a code block that doesn't want a preceding blank
1410 if block_idx < blocks.len() - 1 {
1411 let next_block = &blocks[block_idx + 1];
1412 let should_add_blank = match next_block {
1413 Block::Code {
1414 has_preceding_blank, ..
1415 } => *has_preceding_blank,
1416 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1417 _ => true, // For all other blocks, add blank line
1418 };
1419 if should_add_blank {
1420 result.push(String::new());
1421 }
1422 }
1423 }
1424 Block::SnippetLine(content) => {
1425 // Preserve snippet delimiters (-8<-) as-is on their own line
1426 // Unlike semantic lines, snippet lines don't add extra blank lines
1427 if is_first_block {
1428 // First block starts with marker
1429 result.push(format!("{marker}{content}"));
1430 is_first_block = false;
1431 } else {
1432 // Subsequent blocks use expected indent
1433 result.push(format!("{expected_indent}{content}"));
1434 }
1435 // No blank lines added before or after snippet delimiters
1436 }
1437 Block::Html {
1438 lines: html_lines,
1439 has_preceding_blank: _,
1440 } => {
1441 // Preserve HTML blocks exactly as-is with original indentation
1442 // NOTE: Blank line before HTML block is handled by the previous block
1443
1444 for (idx, line) in html_lines.iter().enumerate() {
1445 if is_first_block && idx == 0 {
1446 // First line of first block gets marker
1447 result.push(format!("{marker}{line}"));
1448 is_first_block = false;
1449 } else if line.is_empty() {
1450 // Preserve blank lines inside HTML blocks
1451 result.push(String::new());
1452 } else {
1453 // Preserve lines with their original content (already includes indentation)
1454 result.push(format!("{expected_indent}{line}"));
1455 }
1456 }
1457
1458 // Add blank line after HTML block if there's a next block
1459 if block_idx < blocks.len() - 1 {
1460 let next_block = &blocks[block_idx + 1];
1461 let should_add_blank = match next_block {
1462 Block::Code {
1463 has_preceding_blank, ..
1464 } => *has_preceding_blank,
1465 Block::Html {
1466 has_preceding_blank, ..
1467 } => *has_preceding_blank,
1468 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1469 _ => true, // For all other blocks, add blank line
1470 };
1471 if should_add_blank {
1472 result.push(String::new());
1473 }
1474 }
1475 }
1476 }
1477 }
1478
1479 let reflowed_text = result.join("\n");
1480
1481 // Preserve trailing newline
1482 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1483 format!("{reflowed_text}\n")
1484 } else {
1485 reflowed_text
1486 };
1487
1488 // Get the original text to compare
1489 let original_text = &ctx.content[byte_range.clone()];
1490
1491 // Only generate a warning if the replacement is different from the original
1492 if original_text != replacement {
1493 // Generate an appropriate message based on why reflow is needed
1494 let message = match config.reflow_mode {
1495 ReflowMode::SentencePerLine => {
1496 let num_sentences = split_into_sentences(&combined_content).len();
1497 let num_lines = content_lines.len();
1498 if num_lines == 1 {
1499 // Single line with multiple sentences
1500 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1501 } else {
1502 // Multiple lines - could be split sentences or mixed
1503 format!(
1504 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1505 )
1506 }
1507 }
1508 ReflowMode::Normalize => {
1509 let combined_length = self.calculate_effective_length(&full_line);
1510 if combined_length > config.line_length.get() {
1511 format!(
1512 "Line length {} exceeds {} characters",
1513 combined_length,
1514 config.line_length.get()
1515 )
1516 } else {
1517 "Multi-line content can be normalized".to_string()
1518 }
1519 }
1520 ReflowMode::Default => {
1521 let combined_length = self.calculate_effective_length(&full_line);
1522 format!(
1523 "Line length {} exceeds {} characters",
1524 combined_length,
1525 config.line_length.get()
1526 )
1527 }
1528 };
1529
1530 warnings.push(LintWarning {
1531 rule_name: Some(self.name().to_string()),
1532 message,
1533 line: list_start + 1,
1534 column: 1,
1535 end_line: end_line + 1,
1536 end_column: lines[end_line].len() + 1,
1537 severity: Severity::Warning,
1538 fix: Some(crate::rule::Fix {
1539 range: byte_range,
1540 replacement,
1541 }),
1542 });
1543 }
1544 }
1545 continue;
1546 }
1547
1548 // Found start of a paragraph - collect all lines in it
1549 let paragraph_start = i;
1550 let mut paragraph_lines = vec![lines[i]];
1551 i += 1;
1552
1553 while i < lines.len() {
1554 let next_line = lines[i];
1555 let next_line_num = i + 1;
1556 let next_trimmed = next_line.trim();
1557
1558 // Stop at paragraph boundaries
1559 if next_trimmed.is_empty()
1560 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1561 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1562 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1563 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1564 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1565 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1566 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1567 || ctx
1568 .line_info(next_line_num)
1569 .is_some_and(|info| info.in_mkdocs_container())
1570 || (next_line_num > 0
1571 && next_line_num <= ctx.lines.len()
1572 && ctx.lines[next_line_num - 1].blockquote.is_some())
1573 || next_trimmed.starts_with('#')
1574 || TableUtils::is_potential_table_row(next_line)
1575 || is_list_item(next_trimmed)
1576 || is_horizontal_rule(next_trimmed)
1577 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1578 || is_template_directive_only(next_line)
1579 || is_standalone_attr_list(next_line)
1580 || is_snippet_block_delimiter(next_line)
1581 {
1582 break;
1583 }
1584
1585 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1586 if i > 0 && has_hard_break(lines[i - 1]) {
1587 // Don't include lines after hard breaks in the same paragraph
1588 break;
1589 }
1590
1591 paragraph_lines.push(next_line);
1592 i += 1;
1593 }
1594
1595 // Combine paragraph lines into a single string for processing
1596 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1597 let paragraph_text = paragraph_lines.join(" ");
1598
1599 // Skip reflowing if this paragraph contains definition list items
1600 // Definition lists are multi-line structures that should not be joined
1601 let contains_definition_list = paragraph_lines
1602 .iter()
1603 .any(|line| crate::utils::is_definition_list_item(line));
1604
1605 if contains_definition_list {
1606 // Don't reflow definition lists - skip this paragraph
1607 i = paragraph_start + paragraph_lines.len();
1608 continue;
1609 }
1610
1611 // Skip reflowing if this paragraph contains MkDocs Snippets markers
1612 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1613 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1614
1615 if contains_snippets {
1616 // Don't reflow Snippets blocks - skip this paragraph
1617 i = paragraph_start + paragraph_lines.len();
1618 continue;
1619 }
1620
1621 // Check if this paragraph needs reflowing
1622 let needs_reflow = match config.reflow_mode {
1623 ReflowMode::Normalize => {
1624 // In normalize mode, reflow multi-line paragraphs
1625 paragraph_lines.len() > 1
1626 }
1627 ReflowMode::SentencePerLine => {
1628 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1629 // Note: we check the joined text because sentences can span multiple lines
1630 let sentences = split_into_sentences(¶graph_text);
1631
1632 // Always reflow if multiple sentences on one line
1633 if sentences.len() > 1 {
1634 true
1635 } else if paragraph_lines.len() > 1 {
1636 // For single-sentence paragraphs spanning multiple lines:
1637 // Reflow if they COULD fit on one line (respecting line-length constraint)
1638 if config.line_length.is_unlimited() {
1639 // No line-length constraint - always join single sentences
1640 true
1641 } else {
1642 // Only join if it fits within line-length
1643 let effective_length = self.calculate_effective_length(¶graph_text);
1644 effective_length <= config.line_length.get()
1645 }
1646 } else {
1647 false
1648 }
1649 }
1650 ReflowMode::Default => {
1651 // In default mode, only reflow if lines exceed limit
1652 paragraph_lines
1653 .iter()
1654 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1655 }
1656 };
1657
1658 if needs_reflow {
1659 // Calculate byte range for this paragraph
1660 // Use whole_line_range for each line and combine
1661 let start_range = line_index.whole_line_range(paragraph_start + 1);
1662 let end_line = paragraph_start + paragraph_lines.len() - 1;
1663
1664 // For the last line, we want to preserve any trailing newline
1665 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1666 // Last line without trailing newline - use line_text_range
1667 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1668 } else {
1669 // Not the last line or has trailing newline - use whole_line_range
1670 line_index.whole_line_range(end_line + 1)
1671 };
1672
1673 let byte_range = start_range.start..end_range.end;
1674
1675 // Check if the paragraph ends with a hard break and what type
1676 let hard_break_type = paragraph_lines.last().and_then(|line| {
1677 let line = line.strip_suffix('\r').unwrap_or(line);
1678 if line.ends_with('\\') {
1679 Some("\\")
1680 } else if line.ends_with(" ") {
1681 Some(" ")
1682 } else {
1683 None
1684 }
1685 });
1686
1687 // Reflow the paragraph
1688 // When line_length = 0 (no limit), use a very large value for reflow
1689 let reflow_line_length = if config.line_length.is_unlimited() {
1690 usize::MAX
1691 } else {
1692 config.line_length.get()
1693 };
1694 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1695 line_length: reflow_line_length,
1696 break_on_sentences: true,
1697 preserve_breaks: false,
1698 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1699 abbreviations: config.abbreviations_for_reflow(),
1700 };
1701 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1702
1703 // If the original paragraph ended with a hard break, preserve it
1704 // Preserve the original hard break format (backslash or two spaces)
1705 if let Some(break_marker) = hard_break_type
1706 && !reflowed.is_empty()
1707 {
1708 let last_idx = reflowed.len() - 1;
1709 if !has_hard_break(&reflowed[last_idx]) {
1710 reflowed[last_idx].push_str(break_marker);
1711 }
1712 }
1713
1714 let reflowed_text = reflowed.join("\n");
1715
1716 // Preserve trailing newline if the original paragraph had one
1717 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1718 format!("{reflowed_text}\n")
1719 } else {
1720 reflowed_text
1721 };
1722
1723 // Get the original text to compare
1724 let original_text = &ctx.content[byte_range.clone()];
1725
1726 // Only generate a warning if the replacement is different from the original
1727 if original_text != replacement {
1728 // Create warning with actual fix
1729 // In default mode, report the specific line that violates
1730 // In normalize mode, report the whole paragraph
1731 // In sentence-per-line mode, report the entire paragraph
1732 let (warning_line, warning_end_line) = match config.reflow_mode {
1733 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1734 ReflowMode::SentencePerLine => {
1735 // Highlight the entire paragraph that needs reformatting
1736 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1737 }
1738 ReflowMode::Default => {
1739 // Find the first line that exceeds the limit
1740 let mut violating_line = paragraph_start;
1741 for (idx, line) in paragraph_lines.iter().enumerate() {
1742 if self.calculate_effective_length(line) > config.line_length.get() {
1743 violating_line = paragraph_start + idx;
1744 break;
1745 }
1746 }
1747 (violating_line + 1, violating_line + 1)
1748 }
1749 };
1750
1751 warnings.push(LintWarning {
1752 rule_name: Some(self.name().to_string()),
1753 message: match config.reflow_mode {
1754 ReflowMode::Normalize => format!(
1755 "Paragraph could be normalized to use line length of {} characters",
1756 config.line_length.get()
1757 ),
1758 ReflowMode::SentencePerLine => {
1759 let num_sentences = split_into_sentences(¶graph_text).len();
1760 if paragraph_lines.len() == 1 {
1761 // Single line with multiple sentences
1762 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1763 } else {
1764 let num_lines = paragraph_lines.len();
1765 // Multiple lines - could be split sentences or mixed
1766 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1767 }
1768 },
1769 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1770 },
1771 line: warning_line,
1772 column: 1,
1773 end_line: warning_end_line,
1774 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1775 severity: Severity::Warning,
1776 fix: Some(crate::rule::Fix {
1777 range: byte_range,
1778 replacement,
1779 }),
1780 });
1781 }
1782 }
1783 }
1784
1785 warnings
1786 }
1787
1788 /// Calculate string length based on the configured length mode
1789 fn calculate_string_length(&self, s: &str) -> usize {
1790 match self.config.length_mode {
1791 LengthMode::Chars => s.chars().count(),
1792 LengthMode::Visual => s.width(),
1793 LengthMode::Bytes => s.len(),
1794 }
1795 }
1796
1797 /// Calculate effective line length excluding unbreakable URLs
1798 fn calculate_effective_length(&self, line: &str) -> usize {
1799 if self.config.strict {
1800 // In strict mode, count everything
1801 return self.calculate_string_length(line);
1802 }
1803
1804 // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1805 let bytes = line.as_bytes();
1806 if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1807 return self.calculate_string_length(line);
1808 }
1809
1810 // More precise check for URLs and links
1811 if !line.contains("http") && !line.contains('[') {
1812 return self.calculate_string_length(line);
1813 }
1814
1815 let mut effective_line = line.to_string();
1816
1817 // First handle markdown links to avoid double-counting URLs
1818 // Pattern: [text](very-long-url) -> [text](url)
1819 if line.contains('[') && line.contains("](") {
1820 for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1821 if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1822 && url.as_str().len() > 15
1823 {
1824 let replacement = format!("[{}](url)", text.as_str());
1825 effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1826 }
1827 }
1828 }
1829
1830 // Then replace bare URLs with a placeholder of reasonable length
1831 // This allows lines with long URLs to pass if the rest of the content is reasonable
1832 if effective_line.contains("http") {
1833 for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1834 let url = url_match.as_str();
1835 // Skip if this URL is already part of a markdown link we handled
1836 if !effective_line.contains(&format!("({url})")) {
1837 // Replace URL with placeholder that represents a "reasonable" URL length
1838 // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1839 let placeholder = "x".repeat(15.min(url.len()));
1840 effective_line = effective_line.replacen(url, &placeholder, 1);
1841 }
1842 }
1843 }
1844
1845 self.calculate_string_length(&effective_line)
1846 }
1847}