rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
7use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
8use crate::utils::range_utils::LineIndex;
9use crate::utils::range_utils::calculate_excess_range;
10use crate::utils::regex_cache::{
11 IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
12};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::split_into_sentences;
15use toml;
16
17mod helpers;
18pub mod md013_config;
19use helpers::{
20 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
21 split_into_segments, trim_preserving_hard_break,
22};
23pub use md013_config::MD013Config;
24use md013_config::{LengthMode, ReflowMode};
25
26#[cfg(test)]
27mod tests;
28use unicode_width::UnicodeWidthStr;
29
30#[derive(Clone, Default)]
31pub struct MD013LineLength {
32 pub(crate) config: MD013Config,
33}
34
35impl MD013LineLength {
36 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
37 Self {
38 config: MD013Config {
39 line_length: crate::types::LineLength::new(line_length),
40 code_blocks,
41 tables,
42 headings,
43 paragraphs: true, // Default to true for backwards compatibility
44 strict,
45 reflow: false,
46 reflow_mode: ReflowMode::default(),
47 length_mode: LengthMode::default(),
48 abbreviations: Vec::new(),
49 },
50 }
51 }
52
53 pub fn from_config_struct(config: MD013Config) -> Self {
54 Self { config }
55 }
56
57 fn should_ignore_line(
58 &self,
59 line: &str,
60 _lines: &[&str],
61 current_line: usize,
62 ctx: &crate::lint_context::LintContext,
63 ) -> bool {
64 if self.config.strict {
65 return false;
66 }
67
68 // Quick check for common patterns before expensive regex
69 let trimmed = line.trim();
70
71 // Only skip if the entire line is a URL (quick check first)
72 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
73 return true;
74 }
75
76 // Only skip if the entire line is an image reference (quick check first)
77 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
78 return true;
79 }
80
81 // Only skip if the entire line is a link reference (quick check first)
82 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
83 return true;
84 }
85
86 // Code blocks with long strings (only check if in code block)
87 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
88 && !trimmed.is_empty()
89 && !line.contains(' ')
90 && !line.contains('\t')
91 {
92 return true;
93 }
94
95 false
96 }
97
98 /// Check if rule should skip based on provided config (used for inline config support)
99 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
100 // Skip if content is empty
101 if ctx.content.is_empty() {
102 return true;
103 }
104
105 // For sentence-per-line or normalize mode, never skip based on line length
106 if config.reflow
107 && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
108 {
109 return false;
110 }
111
112 // Quick check: if total content is shorter than line limit, definitely skip
113 if ctx.content.len() <= config.line_length.get() {
114 return true;
115 }
116
117 // Skip if no line exceeds the limit
118 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
119 }
120}
121
122impl Rule for MD013LineLength {
123 fn name(&self) -> &'static str {
124 "MD013"
125 }
126
127 fn description(&self) -> &'static str {
128 "Line length should not be excessive"
129 }
130
131 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
132 let content = ctx.content;
133
134 // Parse inline configuration FIRST so we can use effective config for should_skip
135 let inline_config = crate::inline_config::InlineConfig::from_content(content);
136 let config_override = inline_config.get_rule_config("MD013");
137
138 // Apply configuration override if present
139 let effective_config = if let Some(json_config) = config_override {
140 if let Some(obj) = json_config.as_object() {
141 let mut config = self.config.clone();
142 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
143 config.line_length = crate::types::LineLength::new(line_length as usize);
144 }
145 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
146 config.code_blocks = code_blocks;
147 }
148 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
149 config.tables = tables;
150 }
151 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
152 config.headings = headings;
153 }
154 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
155 config.strict = strict;
156 }
157 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
158 config.reflow = reflow;
159 }
160 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
161 config.reflow_mode = match reflow_mode {
162 "default" => ReflowMode::Default,
163 "normalize" => ReflowMode::Normalize,
164 "sentence-per-line" => ReflowMode::SentencePerLine,
165 _ => ReflowMode::default(),
166 };
167 }
168 config
169 } else {
170 self.config.clone()
171 }
172 } else {
173 self.config.clone()
174 };
175
176 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
177 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
178 if self.should_skip_with_config(ctx, &effective_config)
179 && !(effective_config.reflow
180 && (effective_config.reflow_mode == ReflowMode::Normalize
181 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
182 {
183 return Ok(Vec::new());
184 }
185
186 // Direct implementation without DocumentStructure
187 let mut warnings = Vec::new();
188
189 // Special handling: line_length = 0 means "no line length limit"
190 // Skip all line length checks, but still allow reflow if enabled
191 let skip_length_checks = effective_config.line_length.is_unlimited();
192
193 // Pre-filter lines that could be problematic to avoid processing all lines
194 let mut candidate_lines = Vec::new();
195 if !skip_length_checks {
196 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
197 // Skip front matter - it should never be linted
198 if line_info.in_front_matter {
199 continue;
200 }
201
202 // Quick length check first
203 if line_info.byte_len > effective_config.line_length.get() {
204 candidate_lines.push(line_idx);
205 }
206 }
207 }
208
209 // If no candidate lines and not in normalize or sentence-per-line mode, early return
210 if candidate_lines.is_empty()
211 && !(effective_config.reflow
212 && (effective_config.reflow_mode == ReflowMode::Normalize
213 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
214 {
215 return Ok(warnings);
216 }
217
218 // Use ctx.lines if available for better performance
219 let lines: Vec<&str> = if !ctx.lines.is_empty() {
220 ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
221 } else {
222 content.lines().collect()
223 };
224
225 // Create a quick lookup set for heading lines
226 // We need this for both the heading skip check AND the paragraphs check
227 let heading_lines_set: std::collections::HashSet<usize> = ctx
228 .lines
229 .iter()
230 .enumerate()
231 .filter(|(_, line)| line.heading.is_some())
232 .map(|(idx, _)| idx + 1)
233 .collect();
234
235 // Use pre-computed table blocks from context
236 // We need this for both the table skip check AND the paragraphs check
237 let table_blocks = &ctx.table_blocks;
238 let mut table_lines_set = std::collections::HashSet::new();
239 for table in table_blocks {
240 table_lines_set.insert(table.header_line + 1);
241 table_lines_set.insert(table.delimiter_line + 1);
242 for &line in &table.content_lines {
243 table_lines_set.insert(line + 1);
244 }
245 }
246
247 // Process candidate lines for line length checks
248 for &line_idx in &candidate_lines {
249 let line_number = line_idx + 1;
250 let line = lines[line_idx];
251
252 // Calculate effective length excluding unbreakable URLs
253 let effective_length = self.calculate_effective_length(line);
254
255 // Use single line length limit for all content
256 let line_limit = effective_config.line_length.get();
257
258 // Skip short lines immediately (double-check after effective length calculation)
259 if effective_length <= line_limit {
260 continue;
261 }
262
263 // Skip mkdocstrings blocks (already handled by LintContext)
264 if ctx.lines[line_idx].in_mkdocstrings {
265 continue;
266 }
267
268 // Skip various block types efficiently
269 if !effective_config.strict {
270 // Skip setext heading underlines
271 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
272 continue;
273 }
274
275 // Skip block elements according to config flags
276 // The flags mean: true = check these elements, false = skip these elements
277 // So we skip when the flag is FALSE and the line is in that element type
278 if (!effective_config.headings && heading_lines_set.contains(&line_number))
279 || (!effective_config.code_blocks
280 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
281 || (!effective_config.tables && table_lines_set.contains(&line_number))
282 || ctx.lines[line_number - 1].blockquote.is_some()
283 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
284 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
285 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
286 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
287 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
288 {
289 continue;
290 }
291
292 // Check if this is a paragraph/regular text line
293 // If paragraphs = false, skip lines that are NOT in special blocks
294 if !effective_config.paragraphs {
295 let is_special_block = heading_lines_set.contains(&line_number)
296 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
297 || table_lines_set.contains(&line_number)
298 || ctx.lines[line_number - 1].blockquote.is_some()
299 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
300 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
301 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
302 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
303 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
304 || ctx
305 .line_info(line_number)
306 .is_some_and(|info| info.in_mkdocs_container());
307
308 // Skip regular paragraph text when paragraphs = false
309 if !is_special_block {
310 continue;
311 }
312 }
313
314 // Skip lines that are only a URL, image ref, or link ref
315 if self.should_ignore_line(line, &lines, line_idx, ctx) {
316 continue;
317 }
318 }
319
320 // In sentence-per-line mode, check if this is a single long sentence
321 // If so, emit a warning without a fix (user must manually rephrase)
322 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
323 let sentences = split_into_sentences(line.trim());
324 if sentences.len() == 1 {
325 // Single sentence that's too long - warn but don't auto-fix
326 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
327
328 let (start_line, start_col, end_line, end_col) =
329 calculate_excess_range(line_number, line, line_limit);
330
331 warnings.push(LintWarning {
332 rule_name: Some(self.name().to_string()),
333 message,
334 line: start_line,
335 column: start_col,
336 end_line,
337 end_column: end_col,
338 severity: Severity::Warning,
339 fix: None, // No auto-fix for long single sentences
340 });
341 continue;
342 }
343 // Multiple sentences will be handled by paragraph-based reflow
344 continue;
345 }
346
347 // Don't provide fix for individual lines when reflow is enabled
348 // Paragraph-based fixes will be handled separately
349 let fix = None;
350
351 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
352
353 // Calculate precise character range for the excess portion
354 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
355
356 warnings.push(LintWarning {
357 rule_name: Some(self.name().to_string()),
358 message,
359 line: start_line,
360 column: start_col,
361 end_line,
362 end_column: end_col,
363 severity: Severity::Warning,
364 fix,
365 });
366 }
367
368 // If reflow is enabled, generate paragraph-based fixes
369 if effective_config.reflow {
370 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
371 // Merge paragraph warnings with line warnings, removing duplicates
372 for pw in paragraph_warnings {
373 // Remove any line warnings that overlap with this paragraph
374 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
375 warnings.push(pw);
376 }
377 }
378
379 Ok(warnings)
380 }
381
382 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
383 // For CLI usage, apply fixes from warnings
384 // LSP will use the warning-based fixes directly
385 let warnings = self.check(ctx)?;
386
387 // If there are no fixes, return content unchanged
388 if !warnings.iter().any(|w| w.fix.is_some()) {
389 return Ok(ctx.content.to_string());
390 }
391
392 // Apply warning-based fixes
393 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
394 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
395 }
396
397 fn as_any(&self) -> &dyn std::any::Any {
398 self
399 }
400
401 fn category(&self) -> RuleCategory {
402 RuleCategory::Whitespace
403 }
404
405 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
406 self.should_skip_with_config(ctx, &self.config)
407 }
408
409 fn default_config_section(&self) -> Option<(String, toml::Value)> {
410 let default_config = MD013Config::default();
411 let json_value = serde_json::to_value(&default_config).ok()?;
412 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
413
414 if let toml::Value::Table(table) = toml_value {
415 if !table.is_empty() {
416 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
417 } else {
418 None
419 }
420 } else {
421 None
422 }
423 }
424
425 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
426 let mut aliases = std::collections::HashMap::new();
427 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
428 Some(aliases)
429 }
430
431 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
432 where
433 Self: Sized,
434 {
435 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
436 // Use global line_length if rule-specific config still has default value
437 if rule_config.line_length.get() == 80 {
438 rule_config.line_length = config.global.line_length;
439 }
440 Box::new(Self::from_config_struct(rule_config))
441 }
442}
443
444impl MD013LineLength {
445 /// Generate paragraph-based fixes
446 fn generate_paragraph_fixes(
447 &self,
448 ctx: &crate::lint_context::LintContext,
449 config: &MD013Config,
450 lines: &[&str],
451 ) -> Vec<LintWarning> {
452 let mut warnings = Vec::new();
453 let line_index = LineIndex::new(ctx.content);
454
455 let mut i = 0;
456 while i < lines.len() {
457 let line_num = i + 1;
458
459 // Skip special structures (but NOT MkDocs containers - those get special handling)
460 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
461 info.in_code_block
462 || info.in_front_matter
463 || info.in_html_block
464 || info.in_html_comment
465 || info.in_esm_block
466 || info.in_jsx_expression
467 || info.in_mdx_comment
468 });
469
470 if should_skip_due_to_line_info
471 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
472 || lines[i].trim().starts_with('#')
473 || TableUtils::is_potential_table_row(lines[i])
474 || lines[i].trim().is_empty()
475 || is_horizontal_rule(lines[i].trim())
476 || is_template_directive_only(lines[i])
477 {
478 i += 1;
479 continue;
480 }
481
482 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
483 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
484 let container_start = i;
485
486 // Detect the actual indent level from the first content line
487 // (supports nested admonitions with 8+ spaces)
488 let first_line = lines[i];
489 let base_indent_len = first_line.len() - first_line.trim_start().len();
490 let base_indent: String = " ".repeat(base_indent_len);
491
492 // Collect consecutive MkDocs container paragraph lines
493 let mut container_lines: Vec<&str> = Vec::new();
494 while i < lines.len() {
495 let current_line_num = i + 1;
496 let line_info = ctx.line_info(current_line_num);
497
498 // Stop if we leave the MkDocs container
499 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
500 break;
501 }
502
503 let line = lines[i];
504
505 // Stop at paragraph boundaries within the container
506 if line.trim().is_empty() {
507 break;
508 }
509
510 // Skip list items, code blocks, headings within containers
511 if is_list_item(line.trim())
512 || line.trim().starts_with("```")
513 || line.trim().starts_with("~~~")
514 || line.trim().starts_with('#')
515 {
516 break;
517 }
518
519 container_lines.push(line);
520 i += 1;
521 }
522
523 if container_lines.is_empty() {
524 // Must advance i to avoid infinite loop when we encounter
525 // non-paragraph content (code block, list, heading, empty line)
526 // at the start of an MkDocs container
527 i += 1;
528 continue;
529 }
530
531 // Strip the base indent from each line and join for reflow
532 let stripped_lines: Vec<&str> = container_lines
533 .iter()
534 .map(|line| {
535 if line.starts_with(&base_indent) {
536 &line[base_indent_len..]
537 } else {
538 line.trim_start()
539 }
540 })
541 .collect();
542 let paragraph_text = stripped_lines.join(" ");
543
544 // Check if reflow is needed
545 let needs_reflow = match config.reflow_mode {
546 ReflowMode::Normalize => container_lines.len() > 1,
547 ReflowMode::SentencePerLine => {
548 let sentences = split_into_sentences(¶graph_text);
549 sentences.len() > 1 || container_lines.len() > 1
550 }
551 ReflowMode::Default => container_lines
552 .iter()
553 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
554 };
555
556 if !needs_reflow {
557 continue;
558 }
559
560 // Calculate byte range for this container paragraph
561 let start_range = line_index.whole_line_range(container_start + 1);
562 let end_line = container_start + container_lines.len() - 1;
563 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
564 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
565 } else {
566 line_index.whole_line_range(end_line + 1)
567 };
568 let byte_range = start_range.start..end_range.end;
569
570 // Reflow with adjusted line length (accounting for the 4-space indent)
571 let reflow_line_length = if config.line_length.is_unlimited() {
572 usize::MAX
573 } else {
574 config.line_length.get().saturating_sub(base_indent_len).max(1)
575 };
576 let reflow_options = crate::utils::text_reflow::ReflowOptions {
577 line_length: reflow_line_length,
578 break_on_sentences: true,
579 preserve_breaks: false,
580 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
581 abbreviations: config.abbreviations_for_reflow(),
582 };
583 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
584
585 // Re-add the 4-space indent to each reflowed line
586 let reflowed_with_indent: Vec<String> =
587 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
588 let reflowed_text = reflowed_with_indent.join("\n");
589
590 // Preserve trailing newline
591 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
592 format!("{reflowed_text}\n")
593 } else {
594 reflowed_text
595 };
596
597 // Only generate a warning if the replacement is different
598 let original_text = &ctx.content[byte_range.clone()];
599 if original_text != replacement {
600 warnings.push(LintWarning {
601 rule_name: Some(self.name().to_string()),
602 message: format!(
603 "Line length {} exceeds {} characters (in MkDocs container)",
604 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
605 config.line_length.get()
606 ),
607 line: container_start + 1,
608 column: 1,
609 end_line: end_line + 1,
610 end_column: lines[end_line].len() + 1,
611 severity: Severity::Warning,
612 fix: Some(crate::rule::Fix {
613 range: byte_range,
614 replacement,
615 }),
616 });
617 }
618 continue;
619 }
620
621 // Helper function to detect semantic line markers
622 let is_semantic_line = |content: &str| -> bool {
623 let trimmed = content.trim_start();
624 let semantic_markers = [
625 "NOTE:",
626 "WARNING:",
627 "IMPORTANT:",
628 "CAUTION:",
629 "TIP:",
630 "DANGER:",
631 "HINT:",
632 "INFO:",
633 ];
634 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
635 };
636
637 // Helper function to detect fence markers (opening or closing)
638 let is_fence_marker = |content: &str| -> bool {
639 let trimmed = content.trim_start();
640 trimmed.starts_with("```") || trimmed.starts_with("~~~")
641 };
642
643 // Check if this is a list item - handle it specially
644 let trimmed = lines[i].trim();
645 if is_list_item(trimmed) {
646 // Collect the entire list item including continuation lines
647 let list_start = i;
648 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
649 let marker_len = marker.len();
650
651 // Track lines and their types (content, code block, fence, nested list)
652 #[derive(Clone)]
653 enum LineType {
654 Content(String),
655 CodeBlock(String, usize), // content and original indent
656 NestedListItem(String, usize), // full line content and original indent
657 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
658 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
659 Empty,
660 }
661
662 let mut actual_indent: Option<usize> = None;
663 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
664 i += 1;
665
666 // Collect continuation lines using ctx.lines for metadata
667 while i < lines.len() {
668 let line_info = &ctx.lines[i];
669
670 // Use pre-computed is_blank from ctx
671 if line_info.is_blank {
672 // Empty line - check if next line is indented (part of list item)
673 if i + 1 < lines.len() {
674 let next_info = &ctx.lines[i + 1];
675
676 // Check if next line is indented enough to be continuation
677 if !next_info.is_blank && next_info.indent >= marker_len {
678 // This blank line is between paragraphs/blocks in the list item
679 list_item_lines.push(LineType::Empty);
680 i += 1;
681 continue;
682 }
683 }
684 // No indented line after blank, end of list item
685 break;
686 }
687
688 // Use pre-computed indent from ctx
689 let indent = line_info.indent;
690
691 // Valid continuation must be indented at least marker_len
692 if indent >= marker_len {
693 let trimmed = line_info.content(ctx.content).trim();
694
695 // Use pre-computed in_code_block from ctx
696 if line_info.in_code_block {
697 list_item_lines.push(LineType::CodeBlock(
698 line_info.content(ctx.content)[indent..].to_string(),
699 indent,
700 ));
701 i += 1;
702 continue;
703 }
704
705 // Check if this is a SIBLING list item (breaks parent)
706 // Nested lists are indented >= marker_len and are PART of the parent item
707 // Siblings are at indent < marker_len (at or before parent marker)
708 if is_list_item(trimmed) && indent < marker_len {
709 // This is a sibling item at same or higher level - end parent item
710 break;
711 }
712
713 // Check if this is a NESTED list item marker
714 // Nested lists should be processed separately UNLESS they're part of a
715 // multi-paragraph list item (indicated by a blank line before them OR
716 // it's a continuation of an already-started nested list)
717 if is_list_item(trimmed) && indent >= marker_len {
718 // Check if there was a blank line before this (multi-paragraph context)
719 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
720
721 // Check if we've already seen nested list content (another nested item)
722 let has_nested_content = list_item_lines.iter().any(|line| {
723 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
724 || matches!(line, LineType::NestedListItem(_, _))
725 });
726
727 if !has_blank_before && !has_nested_content {
728 // Single-paragraph context with no prior nested items: starts a new item
729 // End parent collection; nested list will be processed next
730 break;
731 }
732 // else: multi-paragraph context or continuation of nested list, keep collecting
733 // Mark this as a nested list item to preserve its structure
734 list_item_lines.push(LineType::NestedListItem(
735 line_info.content(ctx.content)[indent..].to_string(),
736 indent,
737 ));
738 i += 1;
739 continue;
740 }
741
742 // Normal continuation: marker_len to marker_len+3
743 if indent <= marker_len + 3 {
744 // Set actual_indent from first non-code continuation if not set
745 if actual_indent.is_none() {
746 actual_indent = Some(indent);
747 }
748
749 // Extract content (remove indentation and trailing whitespace)
750 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
751 // See: https://github.com/rvben/rumdl/issues/76
752 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
753
754 // Check if this is a fence marker (opening or closing)
755 // These should be treated as code block lines, not paragraph content
756 if is_fence_marker(&content) {
757 list_item_lines.push(LineType::CodeBlock(content, indent));
758 }
759 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
760 else if is_semantic_line(&content) {
761 list_item_lines.push(LineType::SemanticLine(content));
762 }
763 // Check if this is a snippet block delimiter (-8<- or --8<--)
764 // These must be preserved on their own lines for MkDocs Snippets extension
765 else if is_snippet_block_delimiter(&content) {
766 list_item_lines.push(LineType::SnippetLine(content));
767 } else {
768 list_item_lines.push(LineType::Content(content));
769 }
770 i += 1;
771 } else {
772 // indent >= marker_len + 4: indented code block
773 list_item_lines.push(LineType::CodeBlock(
774 line_info.content(ctx.content)[indent..].to_string(),
775 indent,
776 ));
777 i += 1;
778 }
779 } else {
780 // Not indented enough, end of list item
781 break;
782 }
783 }
784
785 // Use detected indent or fallback to marker length
786 let indent_size = actual_indent.unwrap_or(marker_len);
787 let expected_indent = " ".repeat(indent_size);
788
789 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
790 #[derive(Clone)]
791 enum Block {
792 Paragraph(Vec<String>),
793 Code {
794 lines: Vec<(String, usize)>, // (content, indent) pairs
795 has_preceding_blank: bool, // Whether there was a blank line before this block
796 },
797 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
798 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
799 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
800 Html {
801 lines: Vec<String>, // HTML content preserved exactly as-is
802 has_preceding_blank: bool, // Whether there was a blank line before this block
803 },
804 }
805
806 // HTML tag detection helpers
807 // Block-level HTML tags that should trigger HTML block detection
808 const BLOCK_LEVEL_TAGS: &[&str] = &[
809 "div",
810 "details",
811 "summary",
812 "section",
813 "article",
814 "header",
815 "footer",
816 "nav",
817 "aside",
818 "main",
819 "table",
820 "thead",
821 "tbody",
822 "tfoot",
823 "tr",
824 "td",
825 "th",
826 "ul",
827 "ol",
828 "li",
829 "dl",
830 "dt",
831 "dd",
832 "pre",
833 "blockquote",
834 "figure",
835 "figcaption",
836 "form",
837 "fieldset",
838 "legend",
839 "hr",
840 "p",
841 "h1",
842 "h2",
843 "h3",
844 "h4",
845 "h5",
846 "h6",
847 "style",
848 "script",
849 "noscript",
850 ];
851
852 fn is_block_html_opening_tag(line: &str) -> Option<String> {
853 let trimmed = line.trim();
854
855 // Check for HTML comments
856 if trimmed.starts_with("<!--") {
857 return Some("!--".to_string());
858 }
859
860 // Check for opening tags
861 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
862 // Extract tag name from <tagname ...> or <tagname>
863 let after_bracket = &trimmed[1..];
864 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
865 let tag_name = after_bracket[..end].to_lowercase();
866
867 // Only treat as block if it's a known block-level tag
868 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
869 return Some(tag_name);
870 }
871 }
872 }
873 None
874 }
875
876 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
877 let trimmed = line.trim();
878
879 // Special handling for HTML comments
880 if tag_name == "!--" {
881 return trimmed.ends_with("-->");
882 }
883
884 // Check for closing tags: </tagname> or </tagname ...>
885 trimmed.starts_with(&format!("</{tag_name}>"))
886 || trimmed.starts_with(&format!("</{tag_name} "))
887 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
888 }
889
890 fn is_self_closing_tag(line: &str) -> bool {
891 let trimmed = line.trim();
892 trimmed.ends_with("/>")
893 }
894
895 let mut blocks: Vec<Block> = Vec::new();
896 let mut current_paragraph: Vec<String> = Vec::new();
897 let mut current_code_block: Vec<(String, usize)> = Vec::new();
898 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
899 let mut current_html_block: Vec<String> = Vec::new();
900 let mut html_tag_stack: Vec<String> = Vec::new();
901 let mut in_code = false;
902 let mut in_nested_list = false;
903 let mut in_html_block = false;
904 let mut had_preceding_blank = false; // Track if we just saw an empty line
905 let mut code_block_has_preceding_blank = false; // Track blank before current code block
906 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
907
908 for line in &list_item_lines {
909 match line {
910 LineType::Empty => {
911 if in_code {
912 current_code_block.push((String::new(), 0));
913 } else if in_nested_list {
914 current_nested_list.push((String::new(), 0));
915 } else if in_html_block {
916 // Allow blank lines inside HTML blocks
917 current_html_block.push(String::new());
918 } else if !current_paragraph.is_empty() {
919 blocks.push(Block::Paragraph(current_paragraph.clone()));
920 current_paragraph.clear();
921 }
922 // Mark that we saw a blank line
923 had_preceding_blank = true;
924 }
925 LineType::Content(content) => {
926 // Check if we're currently in an HTML block
927 if in_html_block {
928 current_html_block.push(content.clone());
929
930 // Check if this line closes any open HTML tags
931 if let Some(last_tag) = html_tag_stack.last() {
932 if is_html_closing_tag(content, last_tag) {
933 html_tag_stack.pop();
934
935 // If stack is empty, HTML block is complete
936 if html_tag_stack.is_empty() {
937 blocks.push(Block::Html {
938 lines: current_html_block.clone(),
939 has_preceding_blank: html_block_has_preceding_blank,
940 });
941 current_html_block.clear();
942 in_html_block = false;
943 }
944 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
945 // Nested opening tag within HTML block
946 if !is_self_closing_tag(content) {
947 html_tag_stack.push(new_tag);
948 }
949 }
950 }
951 had_preceding_blank = false;
952 } else {
953 // Not in HTML block - check if this line starts one
954 if let Some(tag_name) = is_block_html_opening_tag(content) {
955 // Flush current paragraph before starting HTML block
956 if in_code {
957 blocks.push(Block::Code {
958 lines: current_code_block.clone(),
959 has_preceding_blank: code_block_has_preceding_blank,
960 });
961 current_code_block.clear();
962 in_code = false;
963 } else if in_nested_list {
964 blocks.push(Block::NestedList(current_nested_list.clone()));
965 current_nested_list.clear();
966 in_nested_list = false;
967 } else if !current_paragraph.is_empty() {
968 blocks.push(Block::Paragraph(current_paragraph.clone()));
969 current_paragraph.clear();
970 }
971
972 // Start new HTML block
973 in_html_block = true;
974 html_block_has_preceding_blank = had_preceding_blank;
975 current_html_block.push(content.clone());
976
977 // Check if it's self-closing or needs a closing tag
978 if is_self_closing_tag(content) {
979 // Self-closing tag - complete the HTML block immediately
980 blocks.push(Block::Html {
981 lines: current_html_block.clone(),
982 has_preceding_blank: html_block_has_preceding_blank,
983 });
984 current_html_block.clear();
985 in_html_block = false;
986 } else {
987 // Regular opening tag - push to stack
988 html_tag_stack.push(tag_name);
989 }
990 } else {
991 // Regular content line - add to paragraph
992 if in_code {
993 // Switching from code to content
994 blocks.push(Block::Code {
995 lines: current_code_block.clone(),
996 has_preceding_blank: code_block_has_preceding_blank,
997 });
998 current_code_block.clear();
999 in_code = false;
1000 } else if in_nested_list {
1001 // Switching from nested list to content
1002 blocks.push(Block::NestedList(current_nested_list.clone()));
1003 current_nested_list.clear();
1004 in_nested_list = false;
1005 }
1006 current_paragraph.push(content.clone());
1007 }
1008 had_preceding_blank = false; // Reset after content
1009 }
1010 }
1011 LineType::CodeBlock(content, indent) => {
1012 if in_nested_list {
1013 // Switching from nested list to code
1014 blocks.push(Block::NestedList(current_nested_list.clone()));
1015 current_nested_list.clear();
1016 in_nested_list = false;
1017 } else if in_html_block {
1018 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1019 blocks.push(Block::Html {
1020 lines: current_html_block.clone(),
1021 has_preceding_blank: html_block_has_preceding_blank,
1022 });
1023 current_html_block.clear();
1024 html_tag_stack.clear();
1025 in_html_block = false;
1026 }
1027 if !in_code {
1028 // Switching from content to code
1029 if !current_paragraph.is_empty() {
1030 blocks.push(Block::Paragraph(current_paragraph.clone()));
1031 current_paragraph.clear();
1032 }
1033 in_code = true;
1034 // Record whether there was a blank line before this code block
1035 code_block_has_preceding_blank = had_preceding_blank;
1036 }
1037 current_code_block.push((content.clone(), *indent));
1038 had_preceding_blank = false; // Reset after code
1039 }
1040 LineType::NestedListItem(content, indent) => {
1041 if in_code {
1042 // Switching from code to nested list
1043 blocks.push(Block::Code {
1044 lines: current_code_block.clone(),
1045 has_preceding_blank: code_block_has_preceding_blank,
1046 });
1047 current_code_block.clear();
1048 in_code = false;
1049 } else if in_html_block {
1050 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1051 blocks.push(Block::Html {
1052 lines: current_html_block.clone(),
1053 has_preceding_blank: html_block_has_preceding_blank,
1054 });
1055 current_html_block.clear();
1056 html_tag_stack.clear();
1057 in_html_block = false;
1058 }
1059 if !in_nested_list {
1060 // Switching from content to nested list
1061 if !current_paragraph.is_empty() {
1062 blocks.push(Block::Paragraph(current_paragraph.clone()));
1063 current_paragraph.clear();
1064 }
1065 in_nested_list = true;
1066 }
1067 current_nested_list.push((content.clone(), *indent));
1068 had_preceding_blank = false; // Reset after nested list
1069 }
1070 LineType::SemanticLine(content) => {
1071 // Semantic lines are standalone - flush any current block and add as separate block
1072 if in_code {
1073 blocks.push(Block::Code {
1074 lines: current_code_block.clone(),
1075 has_preceding_blank: code_block_has_preceding_blank,
1076 });
1077 current_code_block.clear();
1078 in_code = false;
1079 } else if in_nested_list {
1080 blocks.push(Block::NestedList(current_nested_list.clone()));
1081 current_nested_list.clear();
1082 in_nested_list = false;
1083 } else if in_html_block {
1084 blocks.push(Block::Html {
1085 lines: current_html_block.clone(),
1086 has_preceding_blank: html_block_has_preceding_blank,
1087 });
1088 current_html_block.clear();
1089 html_tag_stack.clear();
1090 in_html_block = false;
1091 } else if !current_paragraph.is_empty() {
1092 blocks.push(Block::Paragraph(current_paragraph.clone()));
1093 current_paragraph.clear();
1094 }
1095 // Add semantic line as its own block
1096 blocks.push(Block::SemanticLine(content.clone()));
1097 had_preceding_blank = false; // Reset after semantic line
1098 }
1099 LineType::SnippetLine(content) => {
1100 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1101 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1102 if in_code {
1103 blocks.push(Block::Code {
1104 lines: current_code_block.clone(),
1105 has_preceding_blank: code_block_has_preceding_blank,
1106 });
1107 current_code_block.clear();
1108 in_code = false;
1109 } else if in_nested_list {
1110 blocks.push(Block::NestedList(current_nested_list.clone()));
1111 current_nested_list.clear();
1112 in_nested_list = false;
1113 } else if in_html_block {
1114 blocks.push(Block::Html {
1115 lines: current_html_block.clone(),
1116 has_preceding_blank: html_block_has_preceding_blank,
1117 });
1118 current_html_block.clear();
1119 html_tag_stack.clear();
1120 in_html_block = false;
1121 } else if !current_paragraph.is_empty() {
1122 blocks.push(Block::Paragraph(current_paragraph.clone()));
1123 current_paragraph.clear();
1124 }
1125 // Add snippet line as its own block
1126 blocks.push(Block::SnippetLine(content.clone()));
1127 had_preceding_blank = false;
1128 }
1129 }
1130 }
1131
1132 // Push remaining block
1133 if in_code && !current_code_block.is_empty() {
1134 blocks.push(Block::Code {
1135 lines: current_code_block,
1136 has_preceding_blank: code_block_has_preceding_blank,
1137 });
1138 } else if in_nested_list && !current_nested_list.is_empty() {
1139 blocks.push(Block::NestedList(current_nested_list));
1140 } else if in_html_block && !current_html_block.is_empty() {
1141 // If we still have an unclosed HTML block, push it anyway
1142 // (malformed HTML - missing closing tag)
1143 blocks.push(Block::Html {
1144 lines: current_html_block,
1145 has_preceding_blank: html_block_has_preceding_blank,
1146 });
1147 } else if !current_paragraph.is_empty() {
1148 blocks.push(Block::Paragraph(current_paragraph));
1149 }
1150
1151 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1152 let content_lines: Vec<String> = list_item_lines
1153 .iter()
1154 .filter_map(|line| {
1155 if let LineType::Content(s) = line {
1156 Some(s.clone())
1157 } else {
1158 None
1159 }
1160 })
1161 .collect();
1162
1163 // Check if we need to reflow this list item
1164 // We check the combined content to see if it exceeds length limits
1165 let combined_content = content_lines.join(" ").trim().to_string();
1166 let full_line = format!("{marker}{combined_content}");
1167
1168 // Helper to check if we should reflow in normalize mode
1169 let should_normalize = || {
1170 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1171 // DO normalize if it has plain text content that spans multiple lines
1172 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1173 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1174 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1175 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1176 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1177
1178 // If we have nested lists, code blocks, semantic lines, or snippet lines but no paragraphs, don't normalize
1179 if (has_nested_lists || has_code_blocks || has_semantic_lines || has_snippet_lines)
1180 && !has_paragraphs
1181 {
1182 return false;
1183 }
1184
1185 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1186 if has_paragraphs {
1187 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1188 if paragraph_count > 1 {
1189 // Multiple paragraph blocks should be normalized
1190 return true;
1191 }
1192
1193 // Single paragraph block: normalize if it has multiple content lines
1194 if content_lines.len() > 1 {
1195 return true;
1196 }
1197 }
1198
1199 false
1200 };
1201
1202 let needs_reflow = match config.reflow_mode {
1203 ReflowMode::Normalize => {
1204 // Only reflow if:
1205 // 1. The combined line would exceed the limit, OR
1206 // 2. The list item should be normalized (has multi-line plain text)
1207 let combined_length = self.calculate_effective_length(&full_line);
1208 if combined_length > config.line_length.get() {
1209 true
1210 } else {
1211 should_normalize()
1212 }
1213 }
1214 ReflowMode::SentencePerLine => {
1215 // Check if list item has multiple sentences
1216 let sentences = split_into_sentences(&combined_content);
1217 sentences.len() > 1
1218 }
1219 ReflowMode::Default => {
1220 // In default mode, only reflow if any individual line exceeds limit
1221 // Check the original lines, not the combined content
1222 (list_start..i)
1223 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1224 }
1225 };
1226
1227 if needs_reflow {
1228 let start_range = line_index.whole_line_range(list_start + 1);
1229 let end_line = i - 1;
1230 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1231 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1232 } else {
1233 line_index.whole_line_range(end_line + 1)
1234 };
1235 let byte_range = start_range.start..end_range.end;
1236
1237 // Reflow each block (paragraphs only, preserve code blocks)
1238 // When line_length = 0 (no limit), use a very large value for reflow
1239 let reflow_line_length = if config.line_length.is_unlimited() {
1240 usize::MAX
1241 } else {
1242 config.line_length.get().saturating_sub(indent_size).max(1)
1243 };
1244 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1245 line_length: reflow_line_length,
1246 break_on_sentences: true,
1247 preserve_breaks: false,
1248 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1249 abbreviations: config.abbreviations_for_reflow(),
1250 };
1251
1252 let mut result: Vec<String> = Vec::new();
1253 let mut is_first_block = true;
1254
1255 for (block_idx, block) in blocks.iter().enumerate() {
1256 match block {
1257 Block::Paragraph(para_lines) => {
1258 // Split the paragraph into segments at hard break boundaries
1259 // Each segment can be reflowed independently
1260 let segments = split_into_segments(para_lines);
1261
1262 for (segment_idx, segment) in segments.iter().enumerate() {
1263 // Check if this segment ends with a hard break and what type
1264 let hard_break_type = segment.last().and_then(|line| {
1265 let line = line.strip_suffix('\r').unwrap_or(line);
1266 if line.ends_with('\\') {
1267 Some("\\")
1268 } else if line.ends_with(" ") {
1269 Some(" ")
1270 } else {
1271 None
1272 }
1273 });
1274
1275 // Join and reflow the segment (removing the hard break marker for processing)
1276 let segment_for_reflow: Vec<String> = segment
1277 .iter()
1278 .map(|line| {
1279 // Strip hard break marker (2 spaces or backslash) for reflow processing
1280 if line.ends_with('\\') {
1281 line[..line.len() - 1].trim_end().to_string()
1282 } else if line.ends_with(" ") {
1283 line[..line.len() - 2].trim_end().to_string()
1284 } else {
1285 line.clone()
1286 }
1287 })
1288 .collect();
1289
1290 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1291 if !segment_text.is_empty() {
1292 let reflowed =
1293 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1294
1295 if is_first_block && segment_idx == 0 {
1296 // First segment of first block starts with marker
1297 result.push(format!("{marker}{}", reflowed[0]));
1298 for line in reflowed.iter().skip(1) {
1299 result.push(format!("{expected_indent}{line}"));
1300 }
1301 is_first_block = false;
1302 } else {
1303 // Subsequent segments
1304 for line in reflowed {
1305 result.push(format!("{expected_indent}{line}"));
1306 }
1307 }
1308
1309 // If this segment had a hard break, add it back to the last line
1310 // Preserve the original hard break format (backslash or two spaces)
1311 if let Some(break_marker) = hard_break_type
1312 && let Some(last_line) = result.last_mut()
1313 {
1314 last_line.push_str(break_marker);
1315 }
1316 }
1317 }
1318
1319 // Add blank line after paragraph block if there's a next block
1320 // BUT: check if next block is a code block that doesn't want a preceding blank
1321 // Also don't add blank lines before snippet lines (they should stay tight)
1322 if block_idx < blocks.len() - 1 {
1323 let next_block = &blocks[block_idx + 1];
1324 let should_add_blank = match next_block {
1325 Block::Code {
1326 has_preceding_blank, ..
1327 } => *has_preceding_blank,
1328 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1329 _ => true, // For all other blocks, add blank line
1330 };
1331 if should_add_blank {
1332 result.push(String::new());
1333 }
1334 }
1335 }
1336 Block::Code {
1337 lines: code_lines,
1338 has_preceding_blank: _,
1339 } => {
1340 // Preserve code blocks as-is with original indentation
1341 // NOTE: Blank line before code block is handled by the previous block
1342 // (see paragraph block's logic above)
1343
1344 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1345 if is_first_block && idx == 0 {
1346 // First line of first block gets marker
1347 result.push(format!(
1348 "{marker}{}",
1349 " ".repeat(orig_indent - marker_len) + content
1350 ));
1351 is_first_block = false;
1352 } else if content.is_empty() {
1353 result.push(String::new());
1354 } else {
1355 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1356 }
1357 }
1358 }
1359 Block::NestedList(nested_items) => {
1360 // Preserve nested list items as-is with original indentation
1361 if !is_first_block {
1362 result.push(String::new());
1363 }
1364
1365 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1366 if is_first_block && idx == 0 {
1367 // First line of first block gets marker
1368 result.push(format!(
1369 "{marker}{}",
1370 " ".repeat(orig_indent - marker_len) + content
1371 ));
1372 is_first_block = false;
1373 } else if content.is_empty() {
1374 result.push(String::new());
1375 } else {
1376 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1377 }
1378 }
1379
1380 // Add blank line after nested list if there's a next block
1381 // Check if next block is a code block that doesn't want a preceding blank
1382 if block_idx < blocks.len() - 1 {
1383 let next_block = &blocks[block_idx + 1];
1384 let should_add_blank = match next_block {
1385 Block::Code {
1386 has_preceding_blank, ..
1387 } => *has_preceding_blank,
1388 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1389 _ => true, // For all other blocks, add blank line
1390 };
1391 if should_add_blank {
1392 result.push(String::new());
1393 }
1394 }
1395 }
1396 Block::SemanticLine(content) => {
1397 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1398 // Add blank line before if not first block
1399 if !is_first_block {
1400 result.push(String::new());
1401 }
1402
1403 if is_first_block {
1404 // First block starts with marker
1405 result.push(format!("{marker}{content}"));
1406 is_first_block = false;
1407 } else {
1408 // Subsequent blocks use expected indent
1409 result.push(format!("{expected_indent}{content}"));
1410 }
1411
1412 // Add blank line after semantic line if there's a next block
1413 // Check if next block is a code block that doesn't want a preceding blank
1414 if block_idx < blocks.len() - 1 {
1415 let next_block = &blocks[block_idx + 1];
1416 let should_add_blank = match next_block {
1417 Block::Code {
1418 has_preceding_blank, ..
1419 } => *has_preceding_blank,
1420 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1421 _ => true, // For all other blocks, add blank line
1422 };
1423 if should_add_blank {
1424 result.push(String::new());
1425 }
1426 }
1427 }
1428 Block::SnippetLine(content) => {
1429 // Preserve snippet delimiters (-8<-) as-is on their own line
1430 // Unlike semantic lines, snippet lines don't add extra blank lines
1431 if is_first_block {
1432 // First block starts with marker
1433 result.push(format!("{marker}{content}"));
1434 is_first_block = false;
1435 } else {
1436 // Subsequent blocks use expected indent
1437 result.push(format!("{expected_indent}{content}"));
1438 }
1439 // No blank lines added before or after snippet delimiters
1440 }
1441 Block::Html {
1442 lines: html_lines,
1443 has_preceding_blank: _,
1444 } => {
1445 // Preserve HTML blocks exactly as-is with original indentation
1446 // NOTE: Blank line before HTML block is handled by the previous block
1447
1448 for (idx, line) in html_lines.iter().enumerate() {
1449 if is_first_block && idx == 0 {
1450 // First line of first block gets marker
1451 result.push(format!("{marker}{line}"));
1452 is_first_block = false;
1453 } else if line.is_empty() {
1454 // Preserve blank lines inside HTML blocks
1455 result.push(String::new());
1456 } else {
1457 // Preserve lines with their original content (already includes indentation)
1458 result.push(format!("{expected_indent}{line}"));
1459 }
1460 }
1461
1462 // Add blank line after HTML block if there's a next block
1463 if block_idx < blocks.len() - 1 {
1464 let next_block = &blocks[block_idx + 1];
1465 let should_add_blank = match next_block {
1466 Block::Code {
1467 has_preceding_blank, ..
1468 } => *has_preceding_blank,
1469 Block::Html {
1470 has_preceding_blank, ..
1471 } => *has_preceding_blank,
1472 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1473 _ => true, // For all other blocks, add blank line
1474 };
1475 if should_add_blank {
1476 result.push(String::new());
1477 }
1478 }
1479 }
1480 }
1481 }
1482
1483 let reflowed_text = result.join("\n");
1484
1485 // Preserve trailing newline
1486 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1487 format!("{reflowed_text}\n")
1488 } else {
1489 reflowed_text
1490 };
1491
1492 // Get the original text to compare
1493 let original_text = &ctx.content[byte_range.clone()];
1494
1495 // Only generate a warning if the replacement is different from the original
1496 if original_text != replacement {
1497 // Generate an appropriate message based on why reflow is needed
1498 let message = match config.reflow_mode {
1499 ReflowMode::SentencePerLine => {
1500 let num_sentences = split_into_sentences(&combined_content).len();
1501 let num_lines = content_lines.len();
1502 if num_lines == 1 {
1503 // Single line with multiple sentences
1504 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1505 } else {
1506 // Multiple lines - could be split sentences or mixed
1507 format!(
1508 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1509 )
1510 }
1511 }
1512 ReflowMode::Normalize => {
1513 let combined_length = self.calculate_effective_length(&full_line);
1514 if combined_length > config.line_length.get() {
1515 format!(
1516 "Line length {} exceeds {} characters",
1517 combined_length,
1518 config.line_length.get()
1519 )
1520 } else {
1521 "Multi-line content can be normalized".to_string()
1522 }
1523 }
1524 ReflowMode::Default => {
1525 let combined_length = self.calculate_effective_length(&full_line);
1526 format!(
1527 "Line length {} exceeds {} characters",
1528 combined_length,
1529 config.line_length.get()
1530 )
1531 }
1532 };
1533
1534 warnings.push(LintWarning {
1535 rule_name: Some(self.name().to_string()),
1536 message,
1537 line: list_start + 1,
1538 column: 1,
1539 end_line: end_line + 1,
1540 end_column: lines[end_line].len() + 1,
1541 severity: Severity::Warning,
1542 fix: Some(crate::rule::Fix {
1543 range: byte_range,
1544 replacement,
1545 }),
1546 });
1547 }
1548 }
1549 continue;
1550 }
1551
1552 // Found start of a paragraph - collect all lines in it
1553 let paragraph_start = i;
1554 let mut paragraph_lines = vec![lines[i]];
1555 i += 1;
1556
1557 while i < lines.len() {
1558 let next_line = lines[i];
1559 let next_line_num = i + 1;
1560 let next_trimmed = next_line.trim();
1561
1562 // Stop at paragraph boundaries
1563 if next_trimmed.is_empty()
1564 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1565 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1566 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1567 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1568 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1569 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1570 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1571 || ctx
1572 .line_info(next_line_num)
1573 .is_some_and(|info| info.in_mkdocs_container())
1574 || (next_line_num > 0
1575 && next_line_num <= ctx.lines.len()
1576 && ctx.lines[next_line_num - 1].blockquote.is_some())
1577 || next_trimmed.starts_with('#')
1578 || TableUtils::is_potential_table_row(next_line)
1579 || is_list_item(next_trimmed)
1580 || is_horizontal_rule(next_trimmed)
1581 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1582 || is_template_directive_only(next_line)
1583 || is_standalone_attr_list(next_line)
1584 || is_snippet_block_delimiter(next_line)
1585 {
1586 break;
1587 }
1588
1589 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1590 if i > 0 && has_hard_break(lines[i - 1]) {
1591 // Don't include lines after hard breaks in the same paragraph
1592 break;
1593 }
1594
1595 paragraph_lines.push(next_line);
1596 i += 1;
1597 }
1598
1599 // Combine paragraph lines into a single string for processing
1600 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1601 let paragraph_text = paragraph_lines.join(" ");
1602
1603 // Skip reflowing if this paragraph contains definition list items
1604 // Definition lists are multi-line structures that should not be joined
1605 let contains_definition_list = paragraph_lines
1606 .iter()
1607 .any(|line| crate::utils::is_definition_list_item(line));
1608
1609 if contains_definition_list {
1610 // Don't reflow definition lists - skip this paragraph
1611 i = paragraph_start + paragraph_lines.len();
1612 continue;
1613 }
1614
1615 // Skip reflowing if this paragraph contains MkDocs Snippets markers
1616 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1617 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1618
1619 if contains_snippets {
1620 // Don't reflow Snippets blocks - skip this paragraph
1621 i = paragraph_start + paragraph_lines.len();
1622 continue;
1623 }
1624
1625 // Check if this paragraph needs reflowing
1626 let needs_reflow = match config.reflow_mode {
1627 ReflowMode::Normalize => {
1628 // In normalize mode, reflow multi-line paragraphs
1629 paragraph_lines.len() > 1
1630 }
1631 ReflowMode::SentencePerLine => {
1632 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1633 // Note: we check the joined text because sentences can span multiple lines
1634 let sentences = split_into_sentences(¶graph_text);
1635
1636 // Always reflow if multiple sentences on one line
1637 if sentences.len() > 1 {
1638 true
1639 } else if paragraph_lines.len() > 1 {
1640 // For single-sentence paragraphs spanning multiple lines:
1641 // Reflow if they COULD fit on one line (respecting line-length constraint)
1642 if config.line_length.is_unlimited() {
1643 // No line-length constraint - always join single sentences
1644 true
1645 } else {
1646 // Only join if it fits within line-length
1647 let effective_length = self.calculate_effective_length(¶graph_text);
1648 effective_length <= config.line_length.get()
1649 }
1650 } else {
1651 false
1652 }
1653 }
1654 ReflowMode::Default => {
1655 // In default mode, only reflow if lines exceed limit
1656 paragraph_lines
1657 .iter()
1658 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1659 }
1660 };
1661
1662 if needs_reflow {
1663 // Calculate byte range for this paragraph
1664 // Use whole_line_range for each line and combine
1665 let start_range = line_index.whole_line_range(paragraph_start + 1);
1666 let end_line = paragraph_start + paragraph_lines.len() - 1;
1667
1668 // For the last line, we want to preserve any trailing newline
1669 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1670 // Last line without trailing newline - use line_text_range
1671 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1672 } else {
1673 // Not the last line or has trailing newline - use whole_line_range
1674 line_index.whole_line_range(end_line + 1)
1675 };
1676
1677 let byte_range = start_range.start..end_range.end;
1678
1679 // Check if the paragraph ends with a hard break and what type
1680 let hard_break_type = paragraph_lines.last().and_then(|line| {
1681 let line = line.strip_suffix('\r').unwrap_or(line);
1682 if line.ends_with('\\') {
1683 Some("\\")
1684 } else if line.ends_with(" ") {
1685 Some(" ")
1686 } else {
1687 None
1688 }
1689 });
1690
1691 // Reflow the paragraph
1692 // When line_length = 0 (no limit), use a very large value for reflow
1693 let reflow_line_length = if config.line_length.is_unlimited() {
1694 usize::MAX
1695 } else {
1696 config.line_length.get()
1697 };
1698 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1699 line_length: reflow_line_length,
1700 break_on_sentences: true,
1701 preserve_breaks: false,
1702 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1703 abbreviations: config.abbreviations_for_reflow(),
1704 };
1705 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1706
1707 // If the original paragraph ended with a hard break, preserve it
1708 // Preserve the original hard break format (backslash or two spaces)
1709 if let Some(break_marker) = hard_break_type
1710 && !reflowed.is_empty()
1711 {
1712 let last_idx = reflowed.len() - 1;
1713 if !has_hard_break(&reflowed[last_idx]) {
1714 reflowed[last_idx].push_str(break_marker);
1715 }
1716 }
1717
1718 let reflowed_text = reflowed.join("\n");
1719
1720 // Preserve trailing newline if the original paragraph had one
1721 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1722 format!("{reflowed_text}\n")
1723 } else {
1724 reflowed_text
1725 };
1726
1727 // Get the original text to compare
1728 let original_text = &ctx.content[byte_range.clone()];
1729
1730 // Only generate a warning if the replacement is different from the original
1731 if original_text != replacement {
1732 // Create warning with actual fix
1733 // In default mode, report the specific line that violates
1734 // In normalize mode, report the whole paragraph
1735 // In sentence-per-line mode, report the entire paragraph
1736 let (warning_line, warning_end_line) = match config.reflow_mode {
1737 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1738 ReflowMode::SentencePerLine => {
1739 // Highlight the entire paragraph that needs reformatting
1740 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1741 }
1742 ReflowMode::Default => {
1743 // Find the first line that exceeds the limit
1744 let mut violating_line = paragraph_start;
1745 for (idx, line) in paragraph_lines.iter().enumerate() {
1746 if self.calculate_effective_length(line) > config.line_length.get() {
1747 violating_line = paragraph_start + idx;
1748 break;
1749 }
1750 }
1751 (violating_line + 1, violating_line + 1)
1752 }
1753 };
1754
1755 warnings.push(LintWarning {
1756 rule_name: Some(self.name().to_string()),
1757 message: match config.reflow_mode {
1758 ReflowMode::Normalize => format!(
1759 "Paragraph could be normalized to use line length of {} characters",
1760 config.line_length.get()
1761 ),
1762 ReflowMode::SentencePerLine => {
1763 let num_sentences = split_into_sentences(¶graph_text).len();
1764 if paragraph_lines.len() == 1 {
1765 // Single line with multiple sentences
1766 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1767 } else {
1768 let num_lines = paragraph_lines.len();
1769 // Multiple lines - could be split sentences or mixed
1770 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1771 }
1772 },
1773 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1774 },
1775 line: warning_line,
1776 column: 1,
1777 end_line: warning_end_line,
1778 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1779 severity: Severity::Warning,
1780 fix: Some(crate::rule::Fix {
1781 range: byte_range,
1782 replacement,
1783 }),
1784 });
1785 }
1786 }
1787 }
1788
1789 warnings
1790 }
1791
1792 /// Calculate string length based on the configured length mode
1793 fn calculate_string_length(&self, s: &str) -> usize {
1794 match self.config.length_mode {
1795 LengthMode::Chars => s.chars().count(),
1796 LengthMode::Visual => s.width(),
1797 LengthMode::Bytes => s.len(),
1798 }
1799 }
1800
1801 /// Calculate effective line length excluding unbreakable URLs
1802 fn calculate_effective_length(&self, line: &str) -> usize {
1803 if self.config.strict {
1804 // In strict mode, count everything
1805 return self.calculate_string_length(line);
1806 }
1807
1808 // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1809 let bytes = line.as_bytes();
1810 if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1811 return self.calculate_string_length(line);
1812 }
1813
1814 // More precise check for URLs and links
1815 if !line.contains("http") && !line.contains('[') {
1816 return self.calculate_string_length(line);
1817 }
1818
1819 let mut effective_line = line.to_string();
1820
1821 // First handle markdown links to avoid double-counting URLs
1822 // Pattern: [text](very-long-url) -> [text](url)
1823 if line.contains('[') && line.contains("](") {
1824 for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1825 if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1826 && url.as_str().len() > 15
1827 {
1828 let replacement = format!("[{}](url)", text.as_str());
1829 effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1830 }
1831 }
1832 }
1833
1834 // Then replace bare URLs with a placeholder of reasonable length
1835 // This allows lines with long URLs to pass if the rest of the content is reasonable
1836 if effective_line.contains("http") {
1837 for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1838 let url = url_match.as_str();
1839 // Skip if this URL is already part of a markdown link we handled
1840 if !effective_line.contains(&format!("({url})")) {
1841 // Replace URL with placeholder that represents a "reasonable" URL length
1842 // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1843 let placeholder = "x".repeat(15.min(url.len()));
1844 effective_line = effective_line.replacen(url, &placeholder, 1);
1845 }
1846 }
1847 }
1848
1849 self.calculate_string_length(&effective_line)
1850 }
1851}