rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::split_into_sentences;
15use pulldown_cmark::LinkType;
16use toml;
17
18mod helpers;
19pub mod md013_config;
20use helpers::{
21 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
22 split_into_segments, trim_preserving_hard_break,
23};
24pub use md013_config::MD013Config;
25use md013_config::{LengthMode, ReflowMode};
26
27#[cfg(test)]
28mod tests;
29use unicode_width::UnicodeWidthStr;
30
31#[derive(Clone, Default)]
32pub struct MD013LineLength {
33 pub(crate) config: MD013Config,
34}
35
36impl MD013LineLength {
37 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
38 Self {
39 config: MD013Config {
40 line_length: crate::types::LineLength::new(line_length),
41 code_blocks,
42 tables,
43 headings,
44 paragraphs: true, // Default to true for backwards compatibility
45 strict,
46 reflow: false,
47 reflow_mode: ReflowMode::default(),
48 length_mode: LengthMode::default(),
49 abbreviations: Vec::new(),
50 },
51 }
52 }
53
54 pub fn from_config_struct(config: MD013Config) -> Self {
55 Self { config }
56 }
57
58 fn should_ignore_line(
59 &self,
60 line: &str,
61 _lines: &[&str],
62 current_line: usize,
63 ctx: &crate::lint_context::LintContext,
64 ) -> bool {
65 if self.config.strict {
66 return false;
67 }
68
69 // Quick check for common patterns before expensive regex
70 let trimmed = line.trim();
71
72 // Only skip if the entire line is a URL (quick check first)
73 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
74 return true;
75 }
76
77 // Only skip if the entire line is an image reference (quick check first)
78 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
79 return true;
80 }
81
82 // Note: link reference definitions are handled as always-exempt (even in strict mode)
83 // in the main check loop, so they don't need to be checked here.
84
85 // Code blocks with long strings (only check if in code block)
86 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
87 && !trimmed.is_empty()
88 && !line.contains(' ')
89 && !line.contains('\t')
90 {
91 return true;
92 }
93
94 false
95 }
96
97 /// Check if rule should skip based on provided config (used for inline config support)
98 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
99 // Skip if content is empty
100 if ctx.content.is_empty() {
101 return true;
102 }
103
104 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
105 if config.reflow
106 && (config.reflow_mode == ReflowMode::SentencePerLine
107 || config.reflow_mode == ReflowMode::SemanticLineBreaks
108 || config.reflow_mode == ReflowMode::Normalize)
109 {
110 return false;
111 }
112
113 // Quick check: if total content is shorter than line limit, definitely skip
114 if ctx.content.len() <= config.line_length.get() {
115 return true;
116 }
117
118 // Skip if no line exceeds the limit
119 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
120 }
121}
122
123impl Rule for MD013LineLength {
124 fn name(&self) -> &'static str {
125 "MD013"
126 }
127
128 fn description(&self) -> &'static str {
129 "Line length should not be excessive"
130 }
131
132 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
133 // Use pre-parsed inline config from LintContext
134 let config_override = ctx.inline_config().get_rule_config("MD013");
135
136 // Apply configuration override if present
137 let effective_config = if let Some(json_config) = config_override {
138 if let Some(obj) = json_config.as_object() {
139 let mut config = self.config.clone();
140 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
141 config.line_length = crate::types::LineLength::new(line_length as usize);
142 }
143 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
144 config.code_blocks = code_blocks;
145 }
146 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
147 config.tables = tables;
148 }
149 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
150 config.headings = headings;
151 }
152 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
153 config.strict = strict;
154 }
155 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
156 config.reflow = reflow;
157 }
158 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
159 config.reflow_mode = match reflow_mode {
160 "default" => ReflowMode::Default,
161 "normalize" => ReflowMode::Normalize,
162 "sentence-per-line" => ReflowMode::SentencePerLine,
163 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
164 _ => ReflowMode::default(),
165 };
166 }
167 config
168 } else {
169 self.config.clone()
170 }
171 } else {
172 self.config.clone()
173 };
174
175 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
176 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
177 if self.should_skip_with_config(ctx, &effective_config)
178 && !(effective_config.reflow
179 && (effective_config.reflow_mode == ReflowMode::Normalize
180 || effective_config.reflow_mode == ReflowMode::SentencePerLine
181 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
182 {
183 return Ok(Vec::new());
184 }
185
186 // Direct implementation without DocumentStructure
187 let mut warnings = Vec::new();
188
189 // Special handling: line_length = 0 means "no line length limit"
190 // Skip all line length checks, but still allow reflow if enabled
191 let skip_length_checks = effective_config.line_length.is_unlimited();
192
193 // Pre-filter lines that could be problematic to avoid processing all lines
194 let mut candidate_lines = Vec::new();
195 if !skip_length_checks {
196 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
197 // Skip front matter - it should never be linted
198 if line_info.in_front_matter {
199 continue;
200 }
201
202 // Quick length check first
203 if line_info.byte_len > effective_config.line_length.get() {
204 candidate_lines.push(line_idx);
205 }
206 }
207 }
208
209 // If no candidate lines and not in normalize or sentence-per-line mode, early return
210 if candidate_lines.is_empty()
211 && !(effective_config.reflow
212 && (effective_config.reflow_mode == ReflowMode::Normalize
213 || effective_config.reflow_mode == ReflowMode::SentencePerLine
214 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
215 {
216 return Ok(warnings);
217 }
218
219 let lines = ctx.raw_lines();
220
221 // Create a quick lookup set for heading lines
222 // We need this for both the heading skip check AND the paragraphs check
223 let heading_lines_set: std::collections::HashSet<usize> = ctx
224 .lines
225 .iter()
226 .enumerate()
227 .filter(|(_, line)| line.heading.is_some())
228 .map(|(idx, _)| idx + 1)
229 .collect();
230
231 // Use pre-computed table blocks from context
232 // We need this for both the table skip check AND the paragraphs check
233 let table_blocks = &ctx.table_blocks;
234 let mut table_lines_set = std::collections::HashSet::new();
235 for table in table_blocks {
236 table_lines_set.insert(table.header_line + 1);
237 table_lines_set.insert(table.delimiter_line + 1);
238 for &line in &table.content_lines {
239 table_lines_set.insert(line + 1);
240 }
241 }
242
243 // Process candidate lines for line length checks
244 for &line_idx in &candidate_lines {
245 let line_number = line_idx + 1;
246 let line = lines[line_idx];
247
248 // Calculate actual line length (used in warning messages)
249 let effective_length = self.calculate_effective_length(line);
250
251 // Use single line length limit for all content
252 let line_limit = effective_config.line_length.get();
253
254 // In non-strict mode, forgive the trailing non-whitespace run.
255 // If the line only exceeds the limit because of a long token at the end
256 // (URL, link chain, identifier), it passes. This matches markdownlint's
257 // behavior: line.replace(/\S*$/u, "#")
258 let check_length = if effective_config.strict {
259 effective_length
260 } else {
261 match line.rfind(char::is_whitespace) {
262 Some(pos) => {
263 let ws_char = line[pos..].chars().next().unwrap();
264 let prefix_end = pos + ws_char.len_utf8();
265 self.calculate_string_length(&line[..prefix_end]) + 1
266 }
267 None => 1, // No whitespace — entire line is a single token
268 }
269 };
270
271 // Skip lines where the check length is within the limit
272 if check_length <= line_limit {
273 continue;
274 }
275
276 // Semantic link understanding: suppress when excess comes entirely from inline URLs
277 if !effective_config.strict {
278 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
279 if text_only_length <= line_limit {
280 continue;
281 }
282 }
283
284 // Skip mkdocstrings blocks (already handled by LintContext)
285 if ctx.lines[line_idx].in_mkdocstrings {
286 continue;
287 }
288
289 // Link reference definitions are always exempt, even in strict mode.
290 // There's no way to shorten them without breaking the URL.
291 {
292 let trimmed = line.trim();
293 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
294 continue;
295 }
296 }
297
298 // Skip various block types efficiently
299 if !effective_config.strict {
300 // Skip setext heading underlines
301 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
302 continue;
303 }
304
305 // Skip block elements according to config flags
306 // The flags mean: true = check these elements, false = skip these elements
307 // So we skip when the flag is FALSE and the line is in that element type
308 if (!effective_config.headings && heading_lines_set.contains(&line_number))
309 || (!effective_config.code_blocks
310 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
311 || (!effective_config.tables && table_lines_set.contains(&line_number))
312 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
313 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
314 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
315 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
316 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
317 {
318 continue;
319 }
320
321 // Check if this is a paragraph/regular text line
322 // If paragraphs = false, skip lines that are NOT in special blocks
323 if !effective_config.paragraphs {
324 let is_special_block = heading_lines_set.contains(&line_number)
325 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
326 || table_lines_set.contains(&line_number)
327 || ctx.lines[line_number - 1].blockquote.is_some()
328 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
329 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
330 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
331 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
332 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
333 || ctx
334 .line_info(line_number)
335 .is_some_and(|info| info.in_mkdocs_container());
336
337 // Skip regular paragraph text when paragraphs = false
338 if !is_special_block {
339 continue;
340 }
341 }
342
343 // Skip lines that are only a URL, image ref, or link ref
344 if self.should_ignore_line(line, lines, line_idx, ctx) {
345 continue;
346 }
347 }
348
349 // In sentence-per-line mode, check if this is a single long sentence
350 // If so, emit a warning without a fix (user must manually rephrase)
351 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
352 let sentences = split_into_sentences(line.trim());
353 if sentences.len() == 1 {
354 // Single sentence that's too long - warn but don't auto-fix
355 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
356
357 let (start_line, start_col, end_line, end_col) =
358 calculate_excess_range(line_number, line, line_limit);
359
360 warnings.push(LintWarning {
361 rule_name: Some(self.name().to_string()),
362 message,
363 line: start_line,
364 column: start_col,
365 end_line,
366 end_column: end_col,
367 severity: Severity::Warning,
368 fix: None, // No auto-fix for long single sentences
369 });
370 continue;
371 }
372 // Multiple sentences will be handled by paragraph-based reflow
373 continue;
374 }
375
376 // In semantic-line-breaks mode, skip per-line checks —
377 // all reflow is handled at the paragraph level with cascading splits
378 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
379 continue;
380 }
381
382 // Don't provide fix for individual lines when reflow is enabled
383 // Paragraph-based fixes will be handled separately
384 let fix = None;
385
386 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
387
388 // Calculate precise character range for the excess portion
389 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
390
391 warnings.push(LintWarning {
392 rule_name: Some(self.name().to_string()),
393 message,
394 line: start_line,
395 column: start_col,
396 end_line,
397 end_column: end_col,
398 severity: Severity::Warning,
399 fix,
400 });
401 }
402
403 // If reflow is enabled, generate paragraph-based fixes
404 if effective_config.reflow {
405 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
406 // Merge paragraph warnings with line warnings, removing duplicates
407 for pw in paragraph_warnings {
408 // Remove any line warnings that overlap with this paragraph
409 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
410 warnings.push(pw);
411 }
412 }
413
414 Ok(warnings)
415 }
416
417 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
418 // For CLI usage, apply fixes from warnings
419 // LSP will use the warning-based fixes directly
420 let warnings = self.check(ctx)?;
421
422 // If there are no fixes, return content unchanged
423 if !warnings.iter().any(|w| w.fix.is_some()) {
424 return Ok(ctx.content.to_string());
425 }
426
427 // Apply warning-based fixes
428 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
429 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
430 }
431
432 fn as_any(&self) -> &dyn std::any::Any {
433 self
434 }
435
436 fn category(&self) -> RuleCategory {
437 RuleCategory::Whitespace
438 }
439
440 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
441 self.should_skip_with_config(ctx, &self.config)
442 }
443
444 fn default_config_section(&self) -> Option<(String, toml::Value)> {
445 let default_config = MD013Config::default();
446 let json_value = serde_json::to_value(&default_config).ok()?;
447 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
448
449 if let toml::Value::Table(table) = toml_value {
450 if !table.is_empty() {
451 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
452 } else {
453 None
454 }
455 } else {
456 None
457 }
458 }
459
460 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
461 let mut aliases = std::collections::HashMap::new();
462 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
463 Some(aliases)
464 }
465
466 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
467 where
468 Self: Sized,
469 {
470 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
471 // Use global line_length if rule-specific config still has default value
472 if rule_config.line_length.get() == 80 {
473 rule_config.line_length = config.global.line_length;
474 }
475 Box::new(Self::from_config_struct(rule_config))
476 }
477}
478
479impl MD013LineLength {
480 /// Generate paragraph-based fixes
481 fn generate_paragraph_fixes(
482 &self,
483 ctx: &crate::lint_context::LintContext,
484 config: &MD013Config,
485 lines: &[&str],
486 ) -> Vec<LintWarning> {
487 let mut warnings = Vec::new();
488 let line_index = LineIndex::new(ctx.content);
489
490 let mut i = 0;
491 while i < lines.len() {
492 let line_num = i + 1;
493
494 // Skip special structures (but NOT MkDocs containers - those get special handling)
495 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
496 info.in_code_block
497 || info.in_front_matter
498 || info.in_html_block
499 || info.in_html_comment
500 || info.in_esm_block
501 || info.in_jsx_expression
502 || info.in_mdx_comment
503 || info.in_mkdocstrings
504 });
505
506 if should_skip_due_to_line_info
507 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
508 || lines[i].trim().starts_with('#')
509 || TableUtils::is_potential_table_row(lines[i])
510 || lines[i].trim().is_empty()
511 || is_horizontal_rule(lines[i].trim())
512 || is_template_directive_only(lines[i])
513 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
514 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
515 {
516 i += 1;
517 continue;
518 }
519
520 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
521 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
522 // Skip admonition/tab marker lines — only reflow their indented content
523 let current_line = lines[i];
524 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
525 i += 1;
526 continue;
527 }
528
529 let container_start = i;
530
531 // Detect the actual indent level from the first content line
532 // (supports nested admonitions with 8+ spaces)
533 let first_line = lines[i];
534 let base_indent_len = first_line.len() - first_line.trim_start().len();
535 let base_indent: String = " ".repeat(base_indent_len);
536
537 // Collect consecutive MkDocs container paragraph lines
538 let mut container_lines: Vec<&str> = Vec::new();
539 while i < lines.len() {
540 let current_line_num = i + 1;
541 let line_info = ctx.line_info(current_line_num);
542
543 // Stop if we leave the MkDocs container
544 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
545 break;
546 }
547
548 let line = lines[i];
549
550 // Stop at paragraph boundaries within the container
551 if line.trim().is_empty() {
552 break;
553 }
554
555 // Skip list items, code blocks, headings within containers
556 if is_list_item(line.trim())
557 || line.trim().starts_with("```")
558 || line.trim().starts_with("~~~")
559 || line.trim().starts_with('#')
560 {
561 break;
562 }
563
564 container_lines.push(line);
565 i += 1;
566 }
567
568 if container_lines.is_empty() {
569 // Must advance i to avoid infinite loop when we encounter
570 // non-paragraph content (code block, list, heading, empty line)
571 // at the start of an MkDocs container
572 i += 1;
573 continue;
574 }
575
576 // Strip the base indent from each line and join for reflow
577 let stripped_lines: Vec<&str> = container_lines
578 .iter()
579 .map(|line| {
580 if line.starts_with(&base_indent) {
581 &line[base_indent_len..]
582 } else {
583 line.trim_start()
584 }
585 })
586 .collect();
587 let paragraph_text = stripped_lines.join(" ");
588
589 // Check if reflow is needed
590 let needs_reflow = match config.reflow_mode {
591 ReflowMode::Normalize => container_lines.len() > 1,
592 ReflowMode::SentencePerLine => {
593 let sentences = split_into_sentences(¶graph_text);
594 sentences.len() > 1 || container_lines.len() > 1
595 }
596 ReflowMode::SemanticLineBreaks => {
597 let sentences = split_into_sentences(¶graph_text);
598 sentences.len() > 1
599 || container_lines.len() > 1
600 || container_lines
601 .iter()
602 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
603 }
604 ReflowMode::Default => container_lines
605 .iter()
606 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
607 };
608
609 if !needs_reflow {
610 continue;
611 }
612
613 // Calculate byte range for this container paragraph
614 let start_range = line_index.whole_line_range(container_start + 1);
615 let end_line = container_start + container_lines.len() - 1;
616 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
617 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
618 } else {
619 line_index.whole_line_range(end_line + 1)
620 };
621 let byte_range = start_range.start..end_range.end;
622
623 // Reflow with adjusted line length (accounting for the 4-space indent)
624 let reflow_line_length = if config.line_length.is_unlimited() {
625 usize::MAX
626 } else {
627 config.line_length.get().saturating_sub(base_indent_len).max(1)
628 };
629 let reflow_options = crate::utils::text_reflow::ReflowOptions {
630 line_length: reflow_line_length,
631 break_on_sentences: true,
632 preserve_breaks: false,
633 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
634 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
635 abbreviations: config.abbreviations_for_reflow(),
636 };
637 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
638
639 // Re-add the 4-space indent to each reflowed line
640 let reflowed_with_indent: Vec<String> =
641 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
642 let reflowed_text = reflowed_with_indent.join("\n");
643
644 // Preserve trailing newline
645 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
646 format!("{reflowed_text}\n")
647 } else {
648 reflowed_text
649 };
650
651 // Only generate a warning if the replacement is different
652 let original_text = &ctx.content[byte_range.clone()];
653 if original_text != replacement {
654 warnings.push(LintWarning {
655 rule_name: Some(self.name().to_string()),
656 message: format!(
657 "Line length {} exceeds {} characters (in MkDocs container)",
658 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
659 config.line_length.get()
660 ),
661 line: container_start + 1,
662 column: 1,
663 end_line: end_line + 1,
664 end_column: lines[end_line].len() + 1,
665 severity: Severity::Warning,
666 fix: Some(crate::rule::Fix {
667 range: byte_range,
668 replacement,
669 }),
670 });
671 }
672 continue;
673 }
674
675 // Helper function to detect semantic line markers
676 let is_semantic_line = |content: &str| -> bool {
677 let trimmed = content.trim_start();
678 let semantic_markers = [
679 "NOTE:",
680 "WARNING:",
681 "IMPORTANT:",
682 "CAUTION:",
683 "TIP:",
684 "DANGER:",
685 "HINT:",
686 "INFO:",
687 ];
688 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
689 };
690
691 // Helper function to detect fence markers (opening or closing)
692 let is_fence_marker = |content: &str| -> bool {
693 let trimmed = content.trim_start();
694 trimmed.starts_with("```") || trimmed.starts_with("~~~")
695 };
696
697 // Check if this is a list item - handle it specially
698 let trimmed = lines[i].trim();
699 if is_list_item(trimmed) {
700 // Collect the entire list item including continuation lines
701 let list_start = i;
702 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
703 let marker_len = marker.len();
704
705 // Track lines and their types (content, code block, fence, nested list)
706 #[derive(Clone)]
707 enum LineType {
708 Content(String),
709 CodeBlock(String, usize), // content and original indent
710 NestedListItem(String, usize), // full line content and original indent
711 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
712 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
713 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
714 Empty,
715 }
716
717 let mut actual_indent: Option<usize> = None;
718 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
719 i += 1;
720
721 // Collect continuation lines using ctx.lines for metadata
722 while i < lines.len() {
723 let line_info = &ctx.lines[i];
724
725 // Use pre-computed is_blank from ctx
726 if line_info.is_blank {
727 // Empty line - check if next line is indented (part of list item)
728 if i + 1 < lines.len() {
729 let next_info = &ctx.lines[i + 1];
730
731 // Check if next line is indented enough to be continuation
732 if !next_info.is_blank && next_info.indent >= marker_len {
733 // This blank line is between paragraphs/blocks in the list item
734 list_item_lines.push(LineType::Empty);
735 i += 1;
736 continue;
737 }
738 }
739 // No indented line after blank, end of list item
740 break;
741 }
742
743 // Use pre-computed indent from ctx
744 let indent = line_info.indent;
745
746 // Valid continuation must be indented at least marker_len
747 if indent >= marker_len {
748 let trimmed = line_info.content(ctx.content).trim();
749
750 // Use pre-computed in_code_block from ctx
751 if line_info.in_code_block {
752 list_item_lines.push(LineType::CodeBlock(
753 line_info.content(ctx.content)[indent..].to_string(),
754 indent,
755 ));
756 i += 1;
757 continue;
758 }
759
760 // Check if this is a SIBLING list item (breaks parent)
761 // Nested lists are indented >= marker_len and are PART of the parent item
762 // Siblings are at indent < marker_len (at or before parent marker)
763 if is_list_item(trimmed) && indent < marker_len {
764 // This is a sibling item at same or higher level - end parent item
765 break;
766 }
767
768 // Check if this is a NESTED list item marker
769 // Nested lists should be processed separately UNLESS they're part of a
770 // multi-paragraph list item (indicated by a blank line before them OR
771 // it's a continuation of an already-started nested list)
772 if is_list_item(trimmed) && indent >= marker_len {
773 // Check if there was a blank line before this (multi-paragraph context)
774 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
775
776 // Check if we've already seen nested list content (another nested item)
777 let has_nested_content = list_item_lines.iter().any(|line| {
778 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
779 || matches!(line, LineType::NestedListItem(_, _))
780 });
781
782 if !has_blank_before && !has_nested_content {
783 // Single-paragraph context with no prior nested items: starts a new item
784 // End parent collection; nested list will be processed next
785 break;
786 }
787 // else: multi-paragraph context or continuation of nested list, keep collecting
788 // Mark this as a nested list item to preserve its structure
789 list_item_lines.push(LineType::NestedListItem(
790 line_info.content(ctx.content)[indent..].to_string(),
791 indent,
792 ));
793 i += 1;
794 continue;
795 }
796
797 // Normal continuation: marker_len to marker_len+3
798 if indent <= marker_len + 3 {
799 // Set actual_indent from first non-code continuation if not set
800 if actual_indent.is_none() {
801 actual_indent = Some(indent);
802 }
803
804 // Extract content (remove indentation and trailing whitespace)
805 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
806 // See: https://github.com/rvben/rumdl/issues/76
807 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
808
809 // Check if this is a div marker (::: opening or closing)
810 // These must be preserved on their own line, not merged into paragraphs
811 if line_info.is_div_marker {
812 list_item_lines.push(LineType::DivMarker(content));
813 }
814 // Check if this is a fence marker (opening or closing)
815 // These should be treated as code block lines, not paragraph content
816 else if is_fence_marker(&content) {
817 list_item_lines.push(LineType::CodeBlock(content, indent));
818 }
819 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
820 else if is_semantic_line(&content) {
821 list_item_lines.push(LineType::SemanticLine(content));
822 }
823 // Check if this is a snippet block delimiter (-8<- or --8<--)
824 // These must be preserved on their own lines for MkDocs Snippets extension
825 else if is_snippet_block_delimiter(&content) {
826 list_item_lines.push(LineType::SnippetLine(content));
827 } else {
828 list_item_lines.push(LineType::Content(content));
829 }
830 i += 1;
831 } else {
832 // indent >= marker_len + 4: indented code block
833 list_item_lines.push(LineType::CodeBlock(
834 line_info.content(ctx.content)[indent..].to_string(),
835 indent,
836 ));
837 i += 1;
838 }
839 } else {
840 // Not indented enough, end of list item
841 break;
842 }
843 }
844
845 // Use detected indent or fallback to marker length
846 let indent_size = actual_indent.unwrap_or(marker_len);
847 let expected_indent = " ".repeat(indent_size);
848
849 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
850 #[derive(Clone)]
851 enum Block {
852 Paragraph(Vec<String>),
853 Code {
854 lines: Vec<(String, usize)>, // (content, indent) pairs
855 has_preceding_blank: bool, // Whether there was a blank line before this block
856 },
857 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
858 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
859 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
860 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
861 Html {
862 lines: Vec<String>, // HTML content preserved exactly as-is
863 has_preceding_blank: bool, // Whether there was a blank line before this block
864 },
865 }
866
867 // HTML tag detection helpers
868 // Block-level HTML tags that should trigger HTML block detection
869 const BLOCK_LEVEL_TAGS: &[&str] = &[
870 "div",
871 "details",
872 "summary",
873 "section",
874 "article",
875 "header",
876 "footer",
877 "nav",
878 "aside",
879 "main",
880 "table",
881 "thead",
882 "tbody",
883 "tfoot",
884 "tr",
885 "td",
886 "th",
887 "ul",
888 "ol",
889 "li",
890 "dl",
891 "dt",
892 "dd",
893 "pre",
894 "blockquote",
895 "figure",
896 "figcaption",
897 "form",
898 "fieldset",
899 "legend",
900 "hr",
901 "p",
902 "h1",
903 "h2",
904 "h3",
905 "h4",
906 "h5",
907 "h6",
908 "style",
909 "script",
910 "noscript",
911 ];
912
913 fn is_block_html_opening_tag(line: &str) -> Option<String> {
914 let trimmed = line.trim();
915
916 // Check for HTML comments
917 if trimmed.starts_with("<!--") {
918 return Some("!--".to_string());
919 }
920
921 // Check for opening tags
922 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
923 // Extract tag name from <tagname ...> or <tagname>
924 let after_bracket = &trimmed[1..];
925 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
926 let tag_name = after_bracket[..end].to_lowercase();
927
928 // Only treat as block if it's a known block-level tag
929 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
930 return Some(tag_name);
931 }
932 }
933 }
934 None
935 }
936
937 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
938 let trimmed = line.trim();
939
940 // Special handling for HTML comments
941 if tag_name == "!--" {
942 return trimmed.ends_with("-->");
943 }
944
945 // Check for closing tags: </tagname> or </tagname ...>
946 trimmed.starts_with(&format!("</{tag_name}>"))
947 || trimmed.starts_with(&format!("</{tag_name} "))
948 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
949 }
950
951 fn is_self_closing_tag(line: &str) -> bool {
952 let trimmed = line.trim();
953 trimmed.ends_with("/>")
954 }
955
956 let mut blocks: Vec<Block> = Vec::new();
957 let mut current_paragraph: Vec<String> = Vec::new();
958 let mut current_code_block: Vec<(String, usize)> = Vec::new();
959 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
960 let mut current_html_block: Vec<String> = Vec::new();
961 let mut html_tag_stack: Vec<String> = Vec::new();
962 let mut in_code = false;
963 let mut in_nested_list = false;
964 let mut in_html_block = false;
965 let mut had_preceding_blank = false; // Track if we just saw an empty line
966 let mut code_block_has_preceding_blank = false; // Track blank before current code block
967 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
968
969 for line in &list_item_lines {
970 match line {
971 LineType::Empty => {
972 if in_code {
973 current_code_block.push((String::new(), 0));
974 } else if in_nested_list {
975 current_nested_list.push((String::new(), 0));
976 } else if in_html_block {
977 // Allow blank lines inside HTML blocks
978 current_html_block.push(String::new());
979 } else if !current_paragraph.is_empty() {
980 blocks.push(Block::Paragraph(current_paragraph.clone()));
981 current_paragraph.clear();
982 }
983 // Mark that we saw a blank line
984 had_preceding_blank = true;
985 }
986 LineType::Content(content) => {
987 // Check if we're currently in an HTML block
988 if in_html_block {
989 current_html_block.push(content.clone());
990
991 // Check if this line closes any open HTML tags
992 if let Some(last_tag) = html_tag_stack.last() {
993 if is_html_closing_tag(content, last_tag) {
994 html_tag_stack.pop();
995
996 // If stack is empty, HTML block is complete
997 if html_tag_stack.is_empty() {
998 blocks.push(Block::Html {
999 lines: current_html_block.clone(),
1000 has_preceding_blank: html_block_has_preceding_blank,
1001 });
1002 current_html_block.clear();
1003 in_html_block = false;
1004 }
1005 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1006 // Nested opening tag within HTML block
1007 if !is_self_closing_tag(content) {
1008 html_tag_stack.push(new_tag);
1009 }
1010 }
1011 }
1012 had_preceding_blank = false;
1013 } else {
1014 // Not in HTML block - check if this line starts one
1015 if let Some(tag_name) = is_block_html_opening_tag(content) {
1016 // Flush current paragraph before starting HTML block
1017 if in_code {
1018 blocks.push(Block::Code {
1019 lines: current_code_block.clone(),
1020 has_preceding_blank: code_block_has_preceding_blank,
1021 });
1022 current_code_block.clear();
1023 in_code = false;
1024 } else if in_nested_list {
1025 blocks.push(Block::NestedList(current_nested_list.clone()));
1026 current_nested_list.clear();
1027 in_nested_list = false;
1028 } else if !current_paragraph.is_empty() {
1029 blocks.push(Block::Paragraph(current_paragraph.clone()));
1030 current_paragraph.clear();
1031 }
1032
1033 // Start new HTML block
1034 in_html_block = true;
1035 html_block_has_preceding_blank = had_preceding_blank;
1036 current_html_block.push(content.clone());
1037
1038 // Check if it's self-closing or needs a closing tag
1039 if is_self_closing_tag(content) {
1040 // Self-closing tag - complete the HTML block immediately
1041 blocks.push(Block::Html {
1042 lines: current_html_block.clone(),
1043 has_preceding_blank: html_block_has_preceding_blank,
1044 });
1045 current_html_block.clear();
1046 in_html_block = false;
1047 } else {
1048 // Regular opening tag - push to stack
1049 html_tag_stack.push(tag_name);
1050 }
1051 } else {
1052 // Regular content line - add to paragraph
1053 if in_code {
1054 // Switching from code to content
1055 blocks.push(Block::Code {
1056 lines: current_code_block.clone(),
1057 has_preceding_blank: code_block_has_preceding_blank,
1058 });
1059 current_code_block.clear();
1060 in_code = false;
1061 } else if in_nested_list {
1062 // Switching from nested list to content
1063 blocks.push(Block::NestedList(current_nested_list.clone()));
1064 current_nested_list.clear();
1065 in_nested_list = false;
1066 }
1067 current_paragraph.push(content.clone());
1068 }
1069 had_preceding_blank = false; // Reset after content
1070 }
1071 }
1072 LineType::CodeBlock(content, indent) => {
1073 if in_nested_list {
1074 // Switching from nested list to code
1075 blocks.push(Block::NestedList(current_nested_list.clone()));
1076 current_nested_list.clear();
1077 in_nested_list = false;
1078 } else if in_html_block {
1079 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1080 blocks.push(Block::Html {
1081 lines: current_html_block.clone(),
1082 has_preceding_blank: html_block_has_preceding_blank,
1083 });
1084 current_html_block.clear();
1085 html_tag_stack.clear();
1086 in_html_block = false;
1087 }
1088 if !in_code {
1089 // Switching from content to code
1090 if !current_paragraph.is_empty() {
1091 blocks.push(Block::Paragraph(current_paragraph.clone()));
1092 current_paragraph.clear();
1093 }
1094 in_code = true;
1095 // Record whether there was a blank line before this code block
1096 code_block_has_preceding_blank = had_preceding_blank;
1097 }
1098 current_code_block.push((content.clone(), *indent));
1099 had_preceding_blank = false; // Reset after code
1100 }
1101 LineType::NestedListItem(content, indent) => {
1102 if in_code {
1103 // Switching from code to nested list
1104 blocks.push(Block::Code {
1105 lines: current_code_block.clone(),
1106 has_preceding_blank: code_block_has_preceding_blank,
1107 });
1108 current_code_block.clear();
1109 in_code = false;
1110 } else if in_html_block {
1111 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1112 blocks.push(Block::Html {
1113 lines: current_html_block.clone(),
1114 has_preceding_blank: html_block_has_preceding_blank,
1115 });
1116 current_html_block.clear();
1117 html_tag_stack.clear();
1118 in_html_block = false;
1119 }
1120 if !in_nested_list {
1121 // Switching from content to nested list
1122 if !current_paragraph.is_empty() {
1123 blocks.push(Block::Paragraph(current_paragraph.clone()));
1124 current_paragraph.clear();
1125 }
1126 in_nested_list = true;
1127 }
1128 current_nested_list.push((content.clone(), *indent));
1129 had_preceding_blank = false; // Reset after nested list
1130 }
1131 LineType::SemanticLine(content) => {
1132 // Semantic lines are standalone - flush any current block and add as separate block
1133 if in_code {
1134 blocks.push(Block::Code {
1135 lines: current_code_block.clone(),
1136 has_preceding_blank: code_block_has_preceding_blank,
1137 });
1138 current_code_block.clear();
1139 in_code = false;
1140 } else if in_nested_list {
1141 blocks.push(Block::NestedList(current_nested_list.clone()));
1142 current_nested_list.clear();
1143 in_nested_list = false;
1144 } else if in_html_block {
1145 blocks.push(Block::Html {
1146 lines: current_html_block.clone(),
1147 has_preceding_blank: html_block_has_preceding_blank,
1148 });
1149 current_html_block.clear();
1150 html_tag_stack.clear();
1151 in_html_block = false;
1152 } else if !current_paragraph.is_empty() {
1153 blocks.push(Block::Paragraph(current_paragraph.clone()));
1154 current_paragraph.clear();
1155 }
1156 // Add semantic line as its own block
1157 blocks.push(Block::SemanticLine(content.clone()));
1158 had_preceding_blank = false; // Reset after semantic line
1159 }
1160 LineType::SnippetLine(content) => {
1161 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1162 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1163 if in_code {
1164 blocks.push(Block::Code {
1165 lines: current_code_block.clone(),
1166 has_preceding_blank: code_block_has_preceding_blank,
1167 });
1168 current_code_block.clear();
1169 in_code = false;
1170 } else if in_nested_list {
1171 blocks.push(Block::NestedList(current_nested_list.clone()));
1172 current_nested_list.clear();
1173 in_nested_list = false;
1174 } else if in_html_block {
1175 blocks.push(Block::Html {
1176 lines: current_html_block.clone(),
1177 has_preceding_blank: html_block_has_preceding_blank,
1178 });
1179 current_html_block.clear();
1180 html_tag_stack.clear();
1181 in_html_block = false;
1182 } else if !current_paragraph.is_empty() {
1183 blocks.push(Block::Paragraph(current_paragraph.clone()));
1184 current_paragraph.clear();
1185 }
1186 // Add snippet line as its own block
1187 blocks.push(Block::SnippetLine(content.clone()));
1188 had_preceding_blank = false;
1189 }
1190 LineType::DivMarker(content) => {
1191 // Div markers (::: opening or closing) are standalone structural delimiters
1192 // Flush any current block and add as separate block
1193 if in_code {
1194 blocks.push(Block::Code {
1195 lines: current_code_block.clone(),
1196 has_preceding_blank: code_block_has_preceding_blank,
1197 });
1198 current_code_block.clear();
1199 in_code = false;
1200 } else if in_nested_list {
1201 blocks.push(Block::NestedList(current_nested_list.clone()));
1202 current_nested_list.clear();
1203 in_nested_list = false;
1204 } else if in_html_block {
1205 blocks.push(Block::Html {
1206 lines: current_html_block.clone(),
1207 has_preceding_blank: html_block_has_preceding_blank,
1208 });
1209 current_html_block.clear();
1210 html_tag_stack.clear();
1211 in_html_block = false;
1212 } else if !current_paragraph.is_empty() {
1213 blocks.push(Block::Paragraph(current_paragraph.clone()));
1214 current_paragraph.clear();
1215 }
1216 blocks.push(Block::DivMarker(content.clone()));
1217 had_preceding_blank = false;
1218 }
1219 }
1220 }
1221
1222 // Push remaining block
1223 if in_code && !current_code_block.is_empty() {
1224 blocks.push(Block::Code {
1225 lines: current_code_block,
1226 has_preceding_blank: code_block_has_preceding_blank,
1227 });
1228 } else if in_nested_list && !current_nested_list.is_empty() {
1229 blocks.push(Block::NestedList(current_nested_list));
1230 } else if in_html_block && !current_html_block.is_empty() {
1231 // If we still have an unclosed HTML block, push it anyway
1232 // (malformed HTML - missing closing tag)
1233 blocks.push(Block::Html {
1234 lines: current_html_block,
1235 has_preceding_blank: html_block_has_preceding_blank,
1236 });
1237 } else if !current_paragraph.is_empty() {
1238 blocks.push(Block::Paragraph(current_paragraph));
1239 }
1240
1241 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1242 let content_lines: Vec<String> = list_item_lines
1243 .iter()
1244 .filter_map(|line| {
1245 if let LineType::Content(s) = line {
1246 Some(s.clone())
1247 } else {
1248 None
1249 }
1250 })
1251 .collect();
1252
1253 // Check if we need to reflow this list item
1254 // We check the combined content to see if it exceeds length limits
1255 let combined_content = content_lines.join(" ").trim().to_string();
1256 let full_line = format!("{marker}{combined_content}");
1257
1258 // Helper to check if we should reflow in normalize mode
1259 let should_normalize = || {
1260 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1261 // DO normalize if it has plain text content that spans multiple lines
1262 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1263 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1264 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1265 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1266 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1267 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1268
1269 // If we have structural blocks but no paragraphs, don't normalize
1270 if (has_nested_lists
1271 || has_code_blocks
1272 || has_semantic_lines
1273 || has_snippet_lines
1274 || has_div_markers)
1275 && !has_paragraphs
1276 {
1277 return false;
1278 }
1279
1280 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1281 if has_paragraphs {
1282 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1283 if paragraph_count > 1 {
1284 // Multiple paragraph blocks should be normalized
1285 return true;
1286 }
1287
1288 // Single paragraph block: normalize if it has multiple content lines
1289 if content_lines.len() > 1 {
1290 return true;
1291 }
1292 }
1293
1294 false
1295 };
1296
1297 let needs_reflow = match config.reflow_mode {
1298 ReflowMode::Normalize => {
1299 // Only reflow if:
1300 // 1. The combined line would exceed the limit, OR
1301 // 2. The list item should be normalized (has multi-line plain text)
1302 let combined_length = self.calculate_effective_length(&full_line);
1303 if combined_length > config.line_length.get() {
1304 true
1305 } else {
1306 should_normalize()
1307 }
1308 }
1309 ReflowMode::SentencePerLine => {
1310 // Check if list item has multiple sentences
1311 let sentences = split_into_sentences(&combined_content);
1312 sentences.len() > 1
1313 }
1314 ReflowMode::SemanticLineBreaks => {
1315 let sentences = split_into_sentences(&combined_content);
1316 sentences.len() > 1
1317 || (list_start..i).any(|line_idx| {
1318 self.calculate_effective_length(lines[line_idx]) > config.line_length.get()
1319 })
1320 }
1321 ReflowMode::Default => {
1322 // In default mode, only reflow if any individual line exceeds limit
1323 (list_start..i)
1324 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1325 }
1326 };
1327
1328 if needs_reflow {
1329 let start_range = line_index.whole_line_range(list_start + 1);
1330 let end_line = i - 1;
1331 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1332 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1333 } else {
1334 line_index.whole_line_range(end_line + 1)
1335 };
1336 let byte_range = start_range.start..end_range.end;
1337
1338 // Reflow each block (paragraphs only, preserve code blocks)
1339 // When line_length = 0 (no limit), use a very large value for reflow
1340 let reflow_line_length = if config.line_length.is_unlimited() {
1341 usize::MAX
1342 } else {
1343 config.line_length.get().saturating_sub(indent_size).max(1)
1344 };
1345 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1346 line_length: reflow_line_length,
1347 break_on_sentences: true,
1348 preserve_breaks: false,
1349 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1350 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1351 abbreviations: config.abbreviations_for_reflow(),
1352 };
1353
1354 let mut result: Vec<String> = Vec::new();
1355 let mut is_first_block = true;
1356
1357 for (block_idx, block) in blocks.iter().enumerate() {
1358 match block {
1359 Block::Paragraph(para_lines) => {
1360 // Split the paragraph into segments at hard break boundaries
1361 // Each segment can be reflowed independently
1362 let segments = split_into_segments(para_lines);
1363
1364 for (segment_idx, segment) in segments.iter().enumerate() {
1365 // Check if this segment ends with a hard break and what type
1366 let hard_break_type = segment.last().and_then(|line| {
1367 let line = line.strip_suffix('\r').unwrap_or(line);
1368 if line.ends_with('\\') {
1369 Some("\\")
1370 } else if line.ends_with(" ") {
1371 Some(" ")
1372 } else {
1373 None
1374 }
1375 });
1376
1377 // Join and reflow the segment (removing the hard break marker for processing)
1378 let segment_for_reflow: Vec<String> = segment
1379 .iter()
1380 .map(|line| {
1381 // Strip hard break marker (2 spaces or backslash) for reflow processing
1382 if line.ends_with('\\') {
1383 line[..line.len() - 1].trim_end().to_string()
1384 } else if line.ends_with(" ") {
1385 line[..line.len() - 2].trim_end().to_string()
1386 } else {
1387 line.clone()
1388 }
1389 })
1390 .collect();
1391
1392 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1393 if !segment_text.is_empty() {
1394 let reflowed =
1395 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1396
1397 if is_first_block && segment_idx == 0 {
1398 // First segment of first block starts with marker
1399 result.push(format!("{marker}{}", reflowed[0]));
1400 for line in reflowed.iter().skip(1) {
1401 result.push(format!("{expected_indent}{line}"));
1402 }
1403 is_first_block = false;
1404 } else {
1405 // Subsequent segments
1406 for line in reflowed {
1407 result.push(format!("{expected_indent}{line}"));
1408 }
1409 }
1410
1411 // If this segment had a hard break, add it back to the last line
1412 // Preserve the original hard break format (backslash or two spaces)
1413 if let Some(break_marker) = hard_break_type
1414 && let Some(last_line) = result.last_mut()
1415 {
1416 last_line.push_str(break_marker);
1417 }
1418 }
1419 }
1420
1421 // Add blank line after paragraph block if there's a next block
1422 // BUT: check if next block is a code block that doesn't want a preceding blank
1423 // Also don't add blank lines before snippet lines (they should stay tight)
1424 if block_idx < blocks.len() - 1 {
1425 let next_block = &blocks[block_idx + 1];
1426 let should_add_blank = match next_block {
1427 Block::Code {
1428 has_preceding_blank, ..
1429 } => *has_preceding_blank,
1430 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1431 _ => true, // For all other blocks, add blank line
1432 };
1433 if should_add_blank {
1434 result.push(String::new());
1435 }
1436 }
1437 }
1438 Block::Code {
1439 lines: code_lines,
1440 has_preceding_blank: _,
1441 } => {
1442 // Preserve code blocks as-is with original indentation
1443 // NOTE: Blank line before code block is handled by the previous block
1444 // (see paragraph block's logic above)
1445
1446 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1447 if is_first_block && idx == 0 {
1448 // First line of first block gets marker
1449 result.push(format!(
1450 "{marker}{}",
1451 " ".repeat(orig_indent - marker_len) + content
1452 ));
1453 is_first_block = false;
1454 } else if content.is_empty() {
1455 result.push(String::new());
1456 } else {
1457 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1458 }
1459 }
1460 }
1461 Block::NestedList(nested_items) => {
1462 // Preserve nested list items as-is with original indentation
1463 if !is_first_block {
1464 result.push(String::new());
1465 }
1466
1467 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1468 if is_first_block && idx == 0 {
1469 // First line of first block gets marker
1470 result.push(format!(
1471 "{marker}{}",
1472 " ".repeat(orig_indent - marker_len) + content
1473 ));
1474 is_first_block = false;
1475 } else if content.is_empty() {
1476 result.push(String::new());
1477 } else {
1478 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1479 }
1480 }
1481
1482 // Add blank line after nested list if there's a next block
1483 // Check if next block is a code block that doesn't want a preceding blank
1484 if block_idx < blocks.len() - 1 {
1485 let next_block = &blocks[block_idx + 1];
1486 let should_add_blank = match next_block {
1487 Block::Code {
1488 has_preceding_blank, ..
1489 } => *has_preceding_blank,
1490 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1491 _ => true, // For all other blocks, add blank line
1492 };
1493 if should_add_blank {
1494 result.push(String::new());
1495 }
1496 }
1497 }
1498 Block::SemanticLine(content) => {
1499 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1500 // Add blank line before if not first block
1501 if !is_first_block {
1502 result.push(String::new());
1503 }
1504
1505 if is_first_block {
1506 // First block starts with marker
1507 result.push(format!("{marker}{content}"));
1508 is_first_block = false;
1509 } else {
1510 // Subsequent blocks use expected indent
1511 result.push(format!("{expected_indent}{content}"));
1512 }
1513
1514 // Add blank line after semantic line if there's a next block
1515 // Check if next block is a code block that doesn't want a preceding blank
1516 if block_idx < blocks.len() - 1 {
1517 let next_block = &blocks[block_idx + 1];
1518 let should_add_blank = match next_block {
1519 Block::Code {
1520 has_preceding_blank, ..
1521 } => *has_preceding_blank,
1522 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1523 _ => true, // For all other blocks, add blank line
1524 };
1525 if should_add_blank {
1526 result.push(String::new());
1527 }
1528 }
1529 }
1530 Block::SnippetLine(content) => {
1531 // Preserve snippet delimiters (-8<-) as-is on their own line
1532 // Unlike semantic lines, snippet lines don't add extra blank lines
1533 if is_first_block {
1534 // First block starts with marker
1535 result.push(format!("{marker}{content}"));
1536 is_first_block = false;
1537 } else {
1538 // Subsequent blocks use expected indent
1539 result.push(format!("{expected_indent}{content}"));
1540 }
1541 // No blank lines added before or after snippet delimiters
1542 }
1543 Block::DivMarker(content) => {
1544 // Preserve div markers (::: opening or closing) as-is on their own line
1545 if is_first_block {
1546 result.push(format!("{marker}{content}"));
1547 is_first_block = false;
1548 } else {
1549 result.push(format!("{expected_indent}{content}"));
1550 }
1551 }
1552 Block::Html {
1553 lines: html_lines,
1554 has_preceding_blank: _,
1555 } => {
1556 // Preserve HTML blocks exactly as-is with original indentation
1557 // NOTE: Blank line before HTML block is handled by the previous block
1558
1559 for (idx, line) in html_lines.iter().enumerate() {
1560 if is_first_block && idx == 0 {
1561 // First line of first block gets marker
1562 result.push(format!("{marker}{line}"));
1563 is_first_block = false;
1564 } else if line.is_empty() {
1565 // Preserve blank lines inside HTML blocks
1566 result.push(String::new());
1567 } else {
1568 // Preserve lines with their original content (already includes indentation)
1569 result.push(format!("{expected_indent}{line}"));
1570 }
1571 }
1572
1573 // Add blank line after HTML block if there's a next block
1574 if block_idx < blocks.len() - 1 {
1575 let next_block = &blocks[block_idx + 1];
1576 let should_add_blank = match next_block {
1577 Block::Code {
1578 has_preceding_blank, ..
1579 } => *has_preceding_blank,
1580 Block::Html {
1581 has_preceding_blank, ..
1582 } => *has_preceding_blank,
1583 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1584 _ => true, // For all other blocks, add blank line
1585 };
1586 if should_add_blank {
1587 result.push(String::new());
1588 }
1589 }
1590 }
1591 }
1592 }
1593
1594 let reflowed_text = result.join("\n");
1595
1596 // Preserve trailing newline
1597 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1598 format!("{reflowed_text}\n")
1599 } else {
1600 reflowed_text
1601 };
1602
1603 // Get the original text to compare
1604 let original_text = &ctx.content[byte_range.clone()];
1605
1606 // Only generate a warning if the replacement is different from the original
1607 if original_text != replacement {
1608 // Generate an appropriate message based on why reflow is needed
1609 let message = match config.reflow_mode {
1610 ReflowMode::SentencePerLine => {
1611 let num_sentences = split_into_sentences(&combined_content).len();
1612 let num_lines = content_lines.len();
1613 if num_lines == 1 {
1614 // Single line with multiple sentences
1615 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1616 } else {
1617 // Multiple lines - could be split sentences or mixed
1618 format!(
1619 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1620 )
1621 }
1622 }
1623 ReflowMode::SemanticLineBreaks => {
1624 let num_sentences = split_into_sentences(&combined_content).len();
1625 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
1626 }
1627 ReflowMode::Normalize => {
1628 let combined_length = self.calculate_effective_length(&full_line);
1629 if combined_length > config.line_length.get() {
1630 format!(
1631 "Line length {} exceeds {} characters",
1632 combined_length,
1633 config.line_length.get()
1634 )
1635 } else {
1636 "Multi-line content can be normalized".to_string()
1637 }
1638 }
1639 ReflowMode::Default => {
1640 let combined_length = self.calculate_effective_length(&full_line);
1641 format!(
1642 "Line length {} exceeds {} characters",
1643 combined_length,
1644 config.line_length.get()
1645 )
1646 }
1647 };
1648
1649 warnings.push(LintWarning {
1650 rule_name: Some(self.name().to_string()),
1651 message,
1652 line: list_start + 1,
1653 column: 1,
1654 end_line: end_line + 1,
1655 end_column: lines[end_line].len() + 1,
1656 severity: Severity::Warning,
1657 fix: Some(crate::rule::Fix {
1658 range: byte_range,
1659 replacement,
1660 }),
1661 });
1662 }
1663 }
1664 continue;
1665 }
1666
1667 // Found start of a paragraph - collect all lines in it
1668 let paragraph_start = i;
1669 let mut paragraph_lines = vec![lines[i]];
1670 i += 1;
1671
1672 while i < lines.len() {
1673 let next_line = lines[i];
1674 let next_line_num = i + 1;
1675 let next_trimmed = next_line.trim();
1676
1677 // Stop at paragraph boundaries
1678 if next_trimmed.is_empty()
1679 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1680 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1681 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1682 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1683 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1684 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1685 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1686 || ctx
1687 .line_info(next_line_num)
1688 .is_some_and(|info| info.in_mkdocs_container())
1689 || (next_line_num > 0
1690 && next_line_num <= ctx.lines.len()
1691 && ctx.lines[next_line_num - 1].blockquote.is_some())
1692 || next_trimmed.starts_with('#')
1693 || TableUtils::is_potential_table_row(next_line)
1694 || is_list_item(next_trimmed)
1695 || is_horizontal_rule(next_trimmed)
1696 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1697 || is_template_directive_only(next_line)
1698 || is_standalone_attr_list(next_line)
1699 || is_snippet_block_delimiter(next_line)
1700 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
1701 {
1702 break;
1703 }
1704
1705 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1706 if i > 0 && has_hard_break(lines[i - 1]) {
1707 // Don't include lines after hard breaks in the same paragraph
1708 break;
1709 }
1710
1711 paragraph_lines.push(next_line);
1712 i += 1;
1713 }
1714
1715 // Combine paragraph lines into a single string for processing
1716 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1717 let paragraph_text = paragraph_lines.join(" ");
1718
1719 // Skip reflowing if this paragraph contains definition list items
1720 // Definition lists are multi-line structures that should not be joined
1721 let contains_definition_list = paragraph_lines
1722 .iter()
1723 .any(|line| crate::utils::is_definition_list_item(line));
1724
1725 if contains_definition_list {
1726 // Don't reflow definition lists - skip this paragraph
1727 i = paragraph_start + paragraph_lines.len();
1728 continue;
1729 }
1730
1731 // Skip reflowing if this paragraph contains MkDocs Snippets markers
1732 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1733 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1734
1735 if contains_snippets {
1736 // Don't reflow Snippets blocks - skip this paragraph
1737 i = paragraph_start + paragraph_lines.len();
1738 continue;
1739 }
1740
1741 // Check if this paragraph needs reflowing
1742 let needs_reflow = match config.reflow_mode {
1743 ReflowMode::Normalize => {
1744 // In normalize mode, reflow multi-line paragraphs
1745 paragraph_lines.len() > 1
1746 }
1747 ReflowMode::SentencePerLine => {
1748 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1749 // Note: we check the joined text because sentences can span multiple lines
1750 let sentences = split_into_sentences(¶graph_text);
1751
1752 // Always reflow if multiple sentences on one line
1753 if sentences.len() > 1 {
1754 true
1755 } else if paragraph_lines.len() > 1 {
1756 // For single-sentence paragraphs spanning multiple lines:
1757 // Reflow if they COULD fit on one line (respecting line-length constraint)
1758 if config.line_length.is_unlimited() {
1759 // No line-length constraint - always join single sentences
1760 true
1761 } else {
1762 // Only join if it fits within line-length
1763 let effective_length = self.calculate_effective_length(¶graph_text);
1764 effective_length <= config.line_length.get()
1765 }
1766 } else {
1767 false
1768 }
1769 }
1770 ReflowMode::SemanticLineBreaks => {
1771 let sentences = split_into_sentences(¶graph_text);
1772 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
1773 sentences.len() > 1
1774 || paragraph_lines.len() > 1
1775 || paragraph_lines
1776 .iter()
1777 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1778 }
1779 ReflowMode::Default => {
1780 // In default mode, only reflow if lines exceed limit
1781 paragraph_lines
1782 .iter()
1783 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1784 }
1785 };
1786
1787 if needs_reflow {
1788 // Calculate byte range for this paragraph
1789 // Use whole_line_range for each line and combine
1790 let start_range = line_index.whole_line_range(paragraph_start + 1);
1791 let end_line = paragraph_start + paragraph_lines.len() - 1;
1792
1793 // For the last line, we want to preserve any trailing newline
1794 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1795 // Last line without trailing newline - use line_text_range
1796 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1797 } else {
1798 // Not the last line or has trailing newline - use whole_line_range
1799 line_index.whole_line_range(end_line + 1)
1800 };
1801
1802 let byte_range = start_range.start..end_range.end;
1803
1804 // Check if the paragraph ends with a hard break and what type
1805 let hard_break_type = paragraph_lines.last().and_then(|line| {
1806 let line = line.strip_suffix('\r').unwrap_or(line);
1807 if line.ends_with('\\') {
1808 Some("\\")
1809 } else if line.ends_with(" ") {
1810 Some(" ")
1811 } else {
1812 None
1813 }
1814 });
1815
1816 // Reflow the paragraph
1817 // When line_length = 0 (no limit), use a very large value for reflow
1818 let reflow_line_length = if config.line_length.is_unlimited() {
1819 usize::MAX
1820 } else {
1821 config.line_length.get()
1822 };
1823 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1824 line_length: reflow_line_length,
1825 break_on_sentences: true,
1826 preserve_breaks: false,
1827 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1828 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1829 abbreviations: config.abbreviations_for_reflow(),
1830 };
1831 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1832
1833 // If the original paragraph ended with a hard break, preserve it
1834 // Preserve the original hard break format (backslash or two spaces)
1835 if let Some(break_marker) = hard_break_type
1836 && !reflowed.is_empty()
1837 {
1838 let last_idx = reflowed.len() - 1;
1839 if !has_hard_break(&reflowed[last_idx]) {
1840 reflowed[last_idx].push_str(break_marker);
1841 }
1842 }
1843
1844 let reflowed_text = reflowed.join("\n");
1845
1846 // Preserve trailing newline if the original paragraph had one
1847 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1848 format!("{reflowed_text}\n")
1849 } else {
1850 reflowed_text
1851 };
1852
1853 // Get the original text to compare
1854 let original_text = &ctx.content[byte_range.clone()];
1855
1856 // Only generate a warning if the replacement is different from the original
1857 if original_text != replacement {
1858 // Create warning with actual fix
1859 // In default mode, report the specific line that violates
1860 // In normalize mode, report the whole paragraph
1861 // In sentence-per-line mode, report the entire paragraph
1862 let (warning_line, warning_end_line) = match config.reflow_mode {
1863 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1864 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
1865 // Highlight the entire paragraph that needs reformatting
1866 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1867 }
1868 ReflowMode::Default => {
1869 // Find the first line that exceeds the limit
1870 let mut violating_line = paragraph_start;
1871 for (idx, line) in paragraph_lines.iter().enumerate() {
1872 if self.calculate_effective_length(line) > config.line_length.get() {
1873 violating_line = paragraph_start + idx;
1874 break;
1875 }
1876 }
1877 (violating_line + 1, violating_line + 1)
1878 }
1879 };
1880
1881 warnings.push(LintWarning {
1882 rule_name: Some(self.name().to_string()),
1883 message: match config.reflow_mode {
1884 ReflowMode::Normalize => format!(
1885 "Paragraph could be normalized to use line length of {} characters",
1886 config.line_length.get()
1887 ),
1888 ReflowMode::SentencePerLine => {
1889 let num_sentences = split_into_sentences(¶graph_text).len();
1890 if paragraph_lines.len() == 1 {
1891 // Single line with multiple sentences
1892 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1893 } else {
1894 let num_lines = paragraph_lines.len();
1895 // Multiple lines - could be split sentences or mixed
1896 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1897 }
1898 },
1899 ReflowMode::SemanticLineBreaks => {
1900 let num_sentences = split_into_sentences(¶graph_text).len();
1901 format!(
1902 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
1903 )
1904 },
1905 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1906 },
1907 line: warning_line,
1908 column: 1,
1909 end_line: warning_end_line,
1910 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1911 severity: Severity::Warning,
1912 fix: Some(crate::rule::Fix {
1913 range: byte_range,
1914 replacement,
1915 }),
1916 });
1917 }
1918 }
1919 }
1920
1921 warnings
1922 }
1923
1924 /// Calculate string length based on the configured length mode
1925 fn calculate_string_length(&self, s: &str) -> usize {
1926 match self.config.length_mode {
1927 LengthMode::Chars => s.chars().count(),
1928 LengthMode::Visual => s.width(),
1929 LengthMode::Bytes => s.len(),
1930 }
1931 }
1932
1933 /// Calculate effective line length
1934 ///
1935 /// Returns the actual display length of the line using the configured length mode.
1936 fn calculate_effective_length(&self, line: &str) -> usize {
1937 self.calculate_string_length(line)
1938 }
1939
1940 /// Calculate line length with inline link/image URLs removed.
1941 ///
1942 /// For each inline link `[text](url)` or image `` on the line,
1943 /// computes the "savings" from removing the URL portion (keeping only `[text]`
1944 /// or `![alt]`). Returns `effective_length - total_savings`.
1945 ///
1946 /// Handles nested constructs (e.g., `[](url)`) by only counting the
1947 /// outermost construct to avoid double-counting.
1948 fn calculate_text_only_length(
1949 &self,
1950 effective_length: usize,
1951 line_number: usize,
1952 ctx: &crate::lint_context::LintContext,
1953 ) -> usize {
1954 let line_range = ctx.line_index.line_content_range(line_number);
1955 let line_byte_end = line_range.end;
1956
1957 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
1958 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
1959
1960 for link in &ctx.links {
1961 if link.line != line_number || link.is_reference {
1962 continue;
1963 }
1964 if !matches!(link.link_type, LinkType::Inline) {
1965 continue;
1966 }
1967 // Skip cross-line links
1968 if link.byte_end > line_byte_end {
1969 continue;
1970 }
1971 // `[text]` in configured length mode
1972 let text_only_len = 2 + self.calculate_string_length(&link.text);
1973 constructs.push((link.byte_offset, link.byte_end, text_only_len));
1974 }
1975
1976 for image in &ctx.images {
1977 if image.line != line_number || image.is_reference {
1978 continue;
1979 }
1980 if !matches!(image.link_type, LinkType::Inline) {
1981 continue;
1982 }
1983 // Skip cross-line images
1984 if image.byte_end > line_byte_end {
1985 continue;
1986 }
1987 // `![alt]` in configured length mode
1988 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
1989 constructs.push((image.byte_offset, image.byte_end, text_only_len));
1990 }
1991
1992 if constructs.is_empty() {
1993 return effective_length;
1994 }
1995
1996 // Sort by byte offset to handle overlapping/nested constructs
1997 constructs.sort_by_key(|&(start, _, _)| start);
1998
1999 let mut total_savings: usize = 0;
2000 let mut last_end: usize = 0;
2001
2002 for (start, end, text_only_len) in &constructs {
2003 // Skip constructs nested inside a previously counted one
2004 if *start < last_end {
2005 continue;
2006 }
2007 // Full construct length in configured length mode
2008 let full_source = &ctx.content[*start..*end];
2009 let full_len = self.calculate_string_length(full_source);
2010 total_savings += full_len.saturating_sub(*text_only_len);
2011 last_end = *end;
2012 }
2013
2014 effective_length.saturating_sub(total_savings)
2015 }
2016}