rumdl_lib/rules/md013_line_length/
mod.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
7use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
8use crate::utils::range_utils::LineIndex;
9use crate::utils::range_utils::calculate_excess_range;
10use crate::utils::regex_cache::{
11    IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
12};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::split_into_sentences;
15use toml;
16
17mod helpers;
18pub mod md013_config;
19use helpers::{
20    extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
21    split_into_segments, trim_preserving_hard_break,
22};
23pub use md013_config::MD013Config;
24use md013_config::{LengthMode, ReflowMode};
25
26#[cfg(test)]
27mod tests;
28use unicode_width::UnicodeWidthStr;
29
30#[derive(Clone, Default)]
31pub struct MD013LineLength {
32    pub(crate) config: MD013Config,
33}
34
35impl MD013LineLength {
36    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
37        Self {
38            config: MD013Config {
39                line_length: crate::types::LineLength::new(line_length),
40                code_blocks,
41                tables,
42                headings,
43                paragraphs: true, // Default to true for backwards compatibility
44                strict,
45                reflow: false,
46                reflow_mode: ReflowMode::default(),
47                length_mode: LengthMode::default(),
48                abbreviations: Vec::new(),
49            },
50        }
51    }
52
53    pub fn from_config_struct(config: MD013Config) -> Self {
54        Self { config }
55    }
56
57    fn should_ignore_line(
58        &self,
59        line: &str,
60        _lines: &[&str],
61        current_line: usize,
62        ctx: &crate::lint_context::LintContext,
63    ) -> bool {
64        if self.config.strict {
65            return false;
66        }
67
68        // Quick check for common patterns before expensive regex
69        let trimmed = line.trim();
70
71        // Only skip if the entire line is a URL (quick check first)
72        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
73            return true;
74        }
75
76        // Only skip if the entire line is an image reference (quick check first)
77        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
78            return true;
79        }
80
81        // Only skip if the entire line is a link reference (quick check first)
82        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
83            return true;
84        }
85
86        // Code blocks with long strings (only check if in code block)
87        if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
88            && !trimmed.is_empty()
89            && !line.contains(' ')
90            && !line.contains('\t')
91        {
92            return true;
93        }
94
95        false
96    }
97
98    /// Check if rule should skip based on provided config (used for inline config support)
99    fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
100        // Skip if content is empty
101        if ctx.content.is_empty() {
102            return true;
103        }
104
105        // For sentence-per-line or normalize mode, never skip based on line length
106        if config.reflow
107            && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
108        {
109            return false;
110        }
111
112        // Quick check: if total content is shorter than line limit, definitely skip
113        if ctx.content.len() <= config.line_length.get() {
114            return true;
115        }
116
117        // Skip if no line exceeds the limit
118        !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
119    }
120}
121
122impl Rule for MD013LineLength {
123    fn name(&self) -> &'static str {
124        "MD013"
125    }
126
127    fn description(&self) -> &'static str {
128        "Line length should not be excessive"
129    }
130
131    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
132        let content = ctx.content;
133
134        // Parse inline configuration FIRST so we can use effective config for should_skip
135        let inline_config = crate::inline_config::InlineConfig::from_content(content);
136        let config_override = inline_config.get_rule_config("MD013");
137
138        // Apply configuration override if present
139        let effective_config = if let Some(json_config) = config_override {
140            if let Some(obj) = json_config.as_object() {
141                let mut config = self.config.clone();
142                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
143                    config.line_length = crate::types::LineLength::new(line_length as usize);
144                }
145                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
146                    config.code_blocks = code_blocks;
147                }
148                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
149                    config.tables = tables;
150                }
151                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
152                    config.headings = headings;
153                }
154                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
155                    config.strict = strict;
156                }
157                if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
158                    config.reflow = reflow;
159                }
160                if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
161                    config.reflow_mode = match reflow_mode {
162                        "default" => ReflowMode::Default,
163                        "normalize" => ReflowMode::Normalize,
164                        "sentence-per-line" => ReflowMode::SentencePerLine,
165                        _ => ReflowMode::default(),
166                    };
167                }
168                config
169            } else {
170                self.config.clone()
171            }
172        } else {
173            self.config.clone()
174        };
175
176        // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
177        // But don't skip if we're in reflow mode with Normalize or SentencePerLine
178        if self.should_skip_with_config(ctx, &effective_config)
179            && !(effective_config.reflow
180                && (effective_config.reflow_mode == ReflowMode::Normalize
181                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
182        {
183            return Ok(Vec::new());
184        }
185
186        // Direct implementation without DocumentStructure
187        let mut warnings = Vec::new();
188
189        // Special handling: line_length = 0 means "no line length limit"
190        // Skip all line length checks, but still allow reflow if enabled
191        let skip_length_checks = effective_config.line_length.is_unlimited();
192
193        // Pre-filter lines that could be problematic to avoid processing all lines
194        let mut candidate_lines = Vec::new();
195        if !skip_length_checks {
196            for (line_idx, line_info) in ctx.lines.iter().enumerate() {
197                // Skip front matter - it should never be linted
198                if line_info.in_front_matter {
199                    continue;
200                }
201
202                // Quick length check first
203                if line_info.byte_len > effective_config.line_length.get() {
204                    candidate_lines.push(line_idx);
205                }
206            }
207        }
208
209        // If no candidate lines and not in normalize or sentence-per-line mode, early return
210        if candidate_lines.is_empty()
211            && !(effective_config.reflow
212                && (effective_config.reflow_mode == ReflowMode::Normalize
213                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
214        {
215            return Ok(warnings);
216        }
217
218        // Use ctx.lines if available for better performance
219        let lines: Vec<&str> = if !ctx.lines.is_empty() {
220            ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
221        } else {
222            content.lines().collect()
223        };
224
225        // Create a quick lookup set for heading lines
226        // We need this for both the heading skip check AND the paragraphs check
227        let heading_lines_set: std::collections::HashSet<usize> = ctx
228            .lines
229            .iter()
230            .enumerate()
231            .filter(|(_, line)| line.heading.is_some())
232            .map(|(idx, _)| idx + 1)
233            .collect();
234
235        // Use pre-computed table blocks from context
236        // We need this for both the table skip check AND the paragraphs check
237        let table_blocks = &ctx.table_blocks;
238        let mut table_lines_set = std::collections::HashSet::new();
239        for table in table_blocks {
240            table_lines_set.insert(table.header_line + 1);
241            table_lines_set.insert(table.delimiter_line + 1);
242            for &line in &table.content_lines {
243                table_lines_set.insert(line + 1);
244            }
245        }
246
247        // Process candidate lines for line length checks
248        for &line_idx in &candidate_lines {
249            let line_number = line_idx + 1;
250            let line = lines[line_idx];
251
252            // Calculate effective length excluding unbreakable URLs
253            let effective_length = self.calculate_effective_length(line);
254
255            // Use single line length limit for all content
256            let line_limit = effective_config.line_length.get();
257
258            // Skip short lines immediately (double-check after effective length calculation)
259            if effective_length <= line_limit {
260                continue;
261            }
262
263            // Skip mkdocstrings blocks (already handled by LintContext)
264            if ctx.lines[line_idx].in_mkdocstrings {
265                continue;
266            }
267
268            // Skip various block types efficiently
269            if !effective_config.strict {
270                // Skip setext heading underlines
271                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
272                    continue;
273                }
274
275                // Skip block elements according to config flags
276                // The flags mean: true = check these elements, false = skip these elements
277                // So we skip when the flag is FALSE and the line is in that element type
278                if (!effective_config.headings && heading_lines_set.contains(&line_number))
279                    || (!effective_config.code_blocks
280                        && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
281                    || (!effective_config.tables && table_lines_set.contains(&line_number))
282                    || ctx.lines[line_number - 1].blockquote.is_some()
283                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
284                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
285                    || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
286                    || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
287                    || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
288                {
289                    continue;
290                }
291
292                // Check if this is a paragraph/regular text line
293                // If paragraphs = false, skip lines that are NOT in special blocks
294                if !effective_config.paragraphs {
295                    let is_special_block = heading_lines_set.contains(&line_number)
296                        || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
297                        || table_lines_set.contains(&line_number)
298                        || ctx.lines[line_number - 1].blockquote.is_some()
299                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
300                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
301                        || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
302                        || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
303                        || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
304                        || ctx
305                            .line_info(line_number)
306                            .is_some_and(|info| info.in_mkdocs_container());
307
308                    // Skip regular paragraph text when paragraphs = false
309                    if !is_special_block {
310                        continue;
311                    }
312                }
313
314                // Skip lines that are only a URL, image ref, or link ref
315                if self.should_ignore_line(line, &lines, line_idx, ctx) {
316                    continue;
317                }
318            }
319
320            // In sentence-per-line mode, check if this is a single long sentence
321            // If so, emit a warning without a fix (user must manually rephrase)
322            if effective_config.reflow_mode == ReflowMode::SentencePerLine {
323                let sentences = split_into_sentences(line.trim());
324                if sentences.len() == 1 {
325                    // Single sentence that's too long - warn but don't auto-fix
326                    let message = format!("Line length {effective_length} exceeds {line_limit} characters");
327
328                    let (start_line, start_col, end_line, end_col) =
329                        calculate_excess_range(line_number, line, line_limit);
330
331                    warnings.push(LintWarning {
332                        rule_name: Some(self.name().to_string()),
333                        message,
334                        line: start_line,
335                        column: start_col,
336                        end_line,
337                        end_column: end_col,
338                        severity: Severity::Warning,
339                        fix: None, // No auto-fix for long single sentences
340                    });
341                    continue;
342                }
343                // Multiple sentences will be handled by paragraph-based reflow
344                continue;
345            }
346
347            // Don't provide fix for individual lines when reflow is enabled
348            // Paragraph-based fixes will be handled separately
349            let fix = None;
350
351            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
352
353            // Calculate precise character range for the excess portion
354            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
355
356            warnings.push(LintWarning {
357                rule_name: Some(self.name().to_string()),
358                message,
359                line: start_line,
360                column: start_col,
361                end_line,
362                end_column: end_col,
363                severity: Severity::Warning,
364                fix,
365            });
366        }
367
368        // If reflow is enabled, generate paragraph-based fixes
369        if effective_config.reflow {
370            let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
371            // Merge paragraph warnings with line warnings, removing duplicates
372            for pw in paragraph_warnings {
373                // Remove any line warnings that overlap with this paragraph
374                warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
375                warnings.push(pw);
376            }
377        }
378
379        Ok(warnings)
380    }
381
382    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
383        // For CLI usage, apply fixes from warnings
384        // LSP will use the warning-based fixes directly
385        let warnings = self.check(ctx)?;
386
387        // If there are no fixes, return content unchanged
388        if !warnings.iter().any(|w| w.fix.is_some()) {
389            return Ok(ctx.content.to_string());
390        }
391
392        // Apply warning-based fixes
393        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
394            .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
395    }
396
397    fn as_any(&self) -> &dyn std::any::Any {
398        self
399    }
400
401    fn category(&self) -> RuleCategory {
402        RuleCategory::Whitespace
403    }
404
405    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
406        self.should_skip_with_config(ctx, &self.config)
407    }
408
409    fn default_config_section(&self) -> Option<(String, toml::Value)> {
410        let default_config = MD013Config::default();
411        let json_value = serde_json::to_value(&default_config).ok()?;
412        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
413
414        if let toml::Value::Table(table) = toml_value {
415            if !table.is_empty() {
416                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
417            } else {
418                None
419            }
420        } else {
421            None
422        }
423    }
424
425    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
426        let mut aliases = std::collections::HashMap::new();
427        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
428        Some(aliases)
429    }
430
431    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
432    where
433        Self: Sized,
434    {
435        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
436        // Use global line_length if rule-specific config still has default value
437        if rule_config.line_length.get() == 80 {
438            rule_config.line_length = config.global.line_length;
439        }
440        Box::new(Self::from_config_struct(rule_config))
441    }
442}
443
444impl MD013LineLength {
445    /// Generate paragraph-based fixes
446    fn generate_paragraph_fixes(
447        &self,
448        ctx: &crate::lint_context::LintContext,
449        config: &MD013Config,
450        lines: &[&str],
451    ) -> Vec<LintWarning> {
452        let mut warnings = Vec::new();
453        let line_index = LineIndex::new(ctx.content);
454
455        let mut i = 0;
456        while i < lines.len() {
457            let line_num = i + 1;
458
459            // Skip special structures (but NOT MkDocs containers - those get special handling)
460            let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
461                info.in_code_block
462                    || info.in_front_matter
463                    || info.in_html_block
464                    || info.in_html_comment
465                    || info.in_esm_block
466                    || info.in_jsx_expression
467                    || info.in_mdx_comment
468            });
469
470            if should_skip_due_to_line_info
471                || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
472                || lines[i].trim().starts_with('#')
473                || TableUtils::is_potential_table_row(lines[i])
474                || lines[i].trim().is_empty()
475                || is_horizontal_rule(lines[i].trim())
476                || is_template_directive_only(lines[i])
477            {
478                i += 1;
479                continue;
480            }
481
482            // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
483            if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
484                let container_start = i;
485
486                // Detect the actual indent level from the first content line
487                // (supports nested admonitions with 8+ spaces)
488                let first_line = lines[i];
489                let base_indent_len = first_line.len() - first_line.trim_start().len();
490                let base_indent: String = " ".repeat(base_indent_len);
491
492                // Collect consecutive MkDocs container paragraph lines
493                let mut container_lines: Vec<&str> = Vec::new();
494                while i < lines.len() {
495                    let current_line_num = i + 1;
496                    let line_info = ctx.line_info(current_line_num);
497
498                    // Stop if we leave the MkDocs container
499                    if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
500                        break;
501                    }
502
503                    let line = lines[i];
504
505                    // Stop at paragraph boundaries within the container
506                    if line.trim().is_empty() {
507                        break;
508                    }
509
510                    // Skip list items, code blocks, headings within containers
511                    if is_list_item(line.trim())
512                        || line.trim().starts_with("```")
513                        || line.trim().starts_with("~~~")
514                        || line.trim().starts_with('#')
515                    {
516                        break;
517                    }
518
519                    container_lines.push(line);
520                    i += 1;
521                }
522
523                if container_lines.is_empty() {
524                    continue;
525                }
526
527                // Strip the base indent from each line and join for reflow
528                let stripped_lines: Vec<&str> = container_lines
529                    .iter()
530                    .map(|line| {
531                        if line.starts_with(&base_indent) {
532                            &line[base_indent_len..]
533                        } else {
534                            line.trim_start()
535                        }
536                    })
537                    .collect();
538                let paragraph_text = stripped_lines.join(" ");
539
540                // Check if reflow is needed
541                let needs_reflow = match config.reflow_mode {
542                    ReflowMode::Normalize => container_lines.len() > 1,
543                    ReflowMode::SentencePerLine => {
544                        let sentences = split_into_sentences(&paragraph_text);
545                        sentences.len() > 1 || container_lines.len() > 1
546                    }
547                    ReflowMode::Default => container_lines
548                        .iter()
549                        .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
550                };
551
552                if !needs_reflow {
553                    continue;
554                }
555
556                // Calculate byte range for this container paragraph
557                let start_range = line_index.whole_line_range(container_start + 1);
558                let end_line = container_start + container_lines.len() - 1;
559                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
560                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
561                } else {
562                    line_index.whole_line_range(end_line + 1)
563                };
564                let byte_range = start_range.start..end_range.end;
565
566                // Reflow with adjusted line length (accounting for the 4-space indent)
567                let reflow_line_length = if config.line_length.is_unlimited() {
568                    usize::MAX
569                } else {
570                    config.line_length.get().saturating_sub(base_indent_len).max(1)
571                };
572                let reflow_options = crate::utils::text_reflow::ReflowOptions {
573                    line_length: reflow_line_length,
574                    break_on_sentences: true,
575                    preserve_breaks: false,
576                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
577                    abbreviations: config.abbreviations_for_reflow(),
578                };
579                let reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
580
581                // Re-add the 4-space indent to each reflowed line
582                let reflowed_with_indent: Vec<String> =
583                    reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
584                let reflowed_text = reflowed_with_indent.join("\n");
585
586                // Preserve trailing newline
587                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
588                    format!("{reflowed_text}\n")
589                } else {
590                    reflowed_text
591                };
592
593                // Only generate a warning if the replacement is different
594                let original_text = &ctx.content[byte_range.clone()];
595                if original_text != replacement {
596                    warnings.push(LintWarning {
597                        rule_name: Some(self.name().to_string()),
598                        message: format!(
599                            "Line length {} exceeds {} characters (in MkDocs container)",
600                            container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
601                            config.line_length.get()
602                        ),
603                        line: container_start + 1,
604                        column: 1,
605                        end_line: end_line + 1,
606                        end_column: lines[end_line].len() + 1,
607                        severity: Severity::Warning,
608                        fix: Some(crate::rule::Fix {
609                            range: byte_range,
610                            replacement,
611                        }),
612                    });
613                }
614                continue;
615            }
616
617            // Helper function to detect semantic line markers
618            let is_semantic_line = |content: &str| -> bool {
619                let trimmed = content.trim_start();
620                let semantic_markers = [
621                    "NOTE:",
622                    "WARNING:",
623                    "IMPORTANT:",
624                    "CAUTION:",
625                    "TIP:",
626                    "DANGER:",
627                    "HINT:",
628                    "INFO:",
629                ];
630                semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
631            };
632
633            // Helper function to detect fence markers (opening or closing)
634            let is_fence_marker = |content: &str| -> bool {
635                let trimmed = content.trim_start();
636                trimmed.starts_with("```") || trimmed.starts_with("~~~")
637            };
638
639            // Check if this is a list item - handle it specially
640            let trimmed = lines[i].trim();
641            if is_list_item(trimmed) {
642                // Collect the entire list item including continuation lines
643                let list_start = i;
644                let (marker, first_content) = extract_list_marker_and_content(lines[i]);
645                let marker_len = marker.len();
646
647                // Track lines and their types (content, code block, fence, nested list)
648                #[derive(Clone)]
649                enum LineType {
650                    Content(String),
651                    CodeBlock(String, usize),      // content and original indent
652                    NestedListItem(String, usize), // full line content and original indent
653                    SemanticLine(String),          // Lines starting with NOTE:, WARNING:, etc that should stay separate
654                    SnippetLine(String),           // MkDocs Snippets delimiters (-8<-) that must stay on their own line
655                    Empty,
656                }
657
658                let mut actual_indent: Option<usize> = None;
659                let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
660                i += 1;
661
662                // Collect continuation lines using ctx.lines for metadata
663                while i < lines.len() {
664                    let line_info = &ctx.lines[i];
665
666                    // Use pre-computed is_blank from ctx
667                    if line_info.is_blank {
668                        // Empty line - check if next line is indented (part of list item)
669                        if i + 1 < lines.len() {
670                            let next_info = &ctx.lines[i + 1];
671
672                            // Check if next line is indented enough to be continuation
673                            if !next_info.is_blank && next_info.indent >= marker_len {
674                                // This blank line is between paragraphs/blocks in the list item
675                                list_item_lines.push(LineType::Empty);
676                                i += 1;
677                                continue;
678                            }
679                        }
680                        // No indented line after blank, end of list item
681                        break;
682                    }
683
684                    // Use pre-computed indent from ctx
685                    let indent = line_info.indent;
686
687                    // Valid continuation must be indented at least marker_len
688                    if indent >= marker_len {
689                        let trimmed = line_info.content(ctx.content).trim();
690
691                        // Use pre-computed in_code_block from ctx
692                        if line_info.in_code_block {
693                            list_item_lines.push(LineType::CodeBlock(
694                                line_info.content(ctx.content)[indent..].to_string(),
695                                indent,
696                            ));
697                            i += 1;
698                            continue;
699                        }
700
701                        // Check if this is a SIBLING list item (breaks parent)
702                        // Nested lists are indented >= marker_len and are PART of the parent item
703                        // Siblings are at indent < marker_len (at or before parent marker)
704                        if is_list_item(trimmed) && indent < marker_len {
705                            // This is a sibling item at same or higher level - end parent item
706                            break;
707                        }
708
709                        // Check if this is a NESTED list item marker
710                        // Nested lists should be processed separately UNLESS they're part of a
711                        // multi-paragraph list item (indicated by a blank line before them OR
712                        // it's a continuation of an already-started nested list)
713                        if is_list_item(trimmed) && indent >= marker_len {
714                            // Check if there was a blank line before this (multi-paragraph context)
715                            let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
716
717                            // Check if we've already seen nested list content (another nested item)
718                            let has_nested_content = list_item_lines.iter().any(|line| {
719                                matches!(line, LineType::Content(c) if is_list_item(c.trim()))
720                                    || matches!(line, LineType::NestedListItem(_, _))
721                            });
722
723                            if !has_blank_before && !has_nested_content {
724                                // Single-paragraph context with no prior nested items: starts a new item
725                                // End parent collection; nested list will be processed next
726                                break;
727                            }
728                            // else: multi-paragraph context or continuation of nested list, keep collecting
729                            // Mark this as a nested list item to preserve its structure
730                            list_item_lines.push(LineType::NestedListItem(
731                                line_info.content(ctx.content)[indent..].to_string(),
732                                indent,
733                            ));
734                            i += 1;
735                            continue;
736                        }
737
738                        // Normal continuation: marker_len to marker_len+3
739                        if indent <= marker_len + 3 {
740                            // Set actual_indent from first non-code continuation if not set
741                            if actual_indent.is_none() {
742                                actual_indent = Some(indent);
743                            }
744
745                            // Extract content (remove indentation and trailing whitespace)
746                            // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
747                            // See: https://github.com/rvben/rumdl/issues/76
748                            let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
749
750                            // Check if this is a fence marker (opening or closing)
751                            // These should be treated as code block lines, not paragraph content
752                            if is_fence_marker(&content) {
753                                list_item_lines.push(LineType::CodeBlock(content, indent));
754                            }
755                            // Check if this is a semantic line (NOTE:, WARNING:, etc.)
756                            else if is_semantic_line(&content) {
757                                list_item_lines.push(LineType::SemanticLine(content));
758                            }
759                            // Check if this is a snippet block delimiter (-8<- or --8<--)
760                            // These must be preserved on their own lines for MkDocs Snippets extension
761                            else if is_snippet_block_delimiter(&content) {
762                                list_item_lines.push(LineType::SnippetLine(content));
763                            } else {
764                                list_item_lines.push(LineType::Content(content));
765                            }
766                            i += 1;
767                        } else {
768                            // indent >= marker_len + 4: indented code block
769                            list_item_lines.push(LineType::CodeBlock(
770                                line_info.content(ctx.content)[indent..].to_string(),
771                                indent,
772                            ));
773                            i += 1;
774                        }
775                    } else {
776                        // Not indented enough, end of list item
777                        break;
778                    }
779                }
780
781                // Use detected indent or fallback to marker length
782                let indent_size = actual_indent.unwrap_or(marker_len);
783                let expected_indent = " ".repeat(indent_size);
784
785                // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
786                #[derive(Clone)]
787                enum Block {
788                    Paragraph(Vec<String>),
789                    Code {
790                        lines: Vec<(String, usize)>, // (content, indent) pairs
791                        has_preceding_blank: bool,   // Whether there was a blank line before this block
792                    },
793                    NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
794                    SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
795                    SnippetLine(String),  // MkDocs Snippets delimiter that stays on its own line without extra spacing
796                    Html {
797                        lines: Vec<String>,        // HTML content preserved exactly as-is
798                        has_preceding_blank: bool, // Whether there was a blank line before this block
799                    },
800                }
801
802                // HTML tag detection helpers
803                // Block-level HTML tags that should trigger HTML block detection
804                const BLOCK_LEVEL_TAGS: &[&str] = &[
805                    "div",
806                    "details",
807                    "summary",
808                    "section",
809                    "article",
810                    "header",
811                    "footer",
812                    "nav",
813                    "aside",
814                    "main",
815                    "table",
816                    "thead",
817                    "tbody",
818                    "tfoot",
819                    "tr",
820                    "td",
821                    "th",
822                    "ul",
823                    "ol",
824                    "li",
825                    "dl",
826                    "dt",
827                    "dd",
828                    "pre",
829                    "blockquote",
830                    "figure",
831                    "figcaption",
832                    "form",
833                    "fieldset",
834                    "legend",
835                    "hr",
836                    "p",
837                    "h1",
838                    "h2",
839                    "h3",
840                    "h4",
841                    "h5",
842                    "h6",
843                    "style",
844                    "script",
845                    "noscript",
846                ];
847
848                fn is_block_html_opening_tag(line: &str) -> Option<String> {
849                    let trimmed = line.trim();
850
851                    // Check for HTML comments
852                    if trimmed.starts_with("<!--") {
853                        return Some("!--".to_string());
854                    }
855
856                    // Check for opening tags
857                    if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
858                        // Extract tag name from <tagname ...> or <tagname>
859                        let after_bracket = &trimmed[1..];
860                        if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
861                            let tag_name = after_bracket[..end].to_lowercase();
862
863                            // Only treat as block if it's a known block-level tag
864                            if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
865                                return Some(tag_name);
866                            }
867                        }
868                    }
869                    None
870                }
871
872                fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
873                    let trimmed = line.trim();
874
875                    // Special handling for HTML comments
876                    if tag_name == "!--" {
877                        return trimmed.ends_with("-->");
878                    }
879
880                    // Check for closing tags: </tagname> or </tagname ...>
881                    trimmed.starts_with(&format!("</{tag_name}>"))
882                        || trimmed.starts_with(&format!("</{tag_name}  "))
883                        || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
884                }
885
886                fn is_self_closing_tag(line: &str) -> bool {
887                    let trimmed = line.trim();
888                    trimmed.ends_with("/>")
889                }
890
891                let mut blocks: Vec<Block> = Vec::new();
892                let mut current_paragraph: Vec<String> = Vec::new();
893                let mut current_code_block: Vec<(String, usize)> = Vec::new();
894                let mut current_nested_list: Vec<(String, usize)> = Vec::new();
895                let mut current_html_block: Vec<String> = Vec::new();
896                let mut html_tag_stack: Vec<String> = Vec::new();
897                let mut in_code = false;
898                let mut in_nested_list = false;
899                let mut in_html_block = false;
900                let mut had_preceding_blank = false; // Track if we just saw an empty line
901                let mut code_block_has_preceding_blank = false; // Track blank before current code block
902                let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
903
904                for line in &list_item_lines {
905                    match line {
906                        LineType::Empty => {
907                            if in_code {
908                                current_code_block.push((String::new(), 0));
909                            } else if in_nested_list {
910                                current_nested_list.push((String::new(), 0));
911                            } else if in_html_block {
912                                // Allow blank lines inside HTML blocks
913                                current_html_block.push(String::new());
914                            } else if !current_paragraph.is_empty() {
915                                blocks.push(Block::Paragraph(current_paragraph.clone()));
916                                current_paragraph.clear();
917                            }
918                            // Mark that we saw a blank line
919                            had_preceding_blank = true;
920                        }
921                        LineType::Content(content) => {
922                            // Check if we're currently in an HTML block
923                            if in_html_block {
924                                current_html_block.push(content.clone());
925
926                                // Check if this line closes any open HTML tags
927                                if let Some(last_tag) = html_tag_stack.last() {
928                                    if is_html_closing_tag(content, last_tag) {
929                                        html_tag_stack.pop();
930
931                                        // If stack is empty, HTML block is complete
932                                        if html_tag_stack.is_empty() {
933                                            blocks.push(Block::Html {
934                                                lines: current_html_block.clone(),
935                                                has_preceding_blank: html_block_has_preceding_blank,
936                                            });
937                                            current_html_block.clear();
938                                            in_html_block = false;
939                                        }
940                                    } else if let Some(new_tag) = is_block_html_opening_tag(content) {
941                                        // Nested opening tag within HTML block
942                                        if !is_self_closing_tag(content) {
943                                            html_tag_stack.push(new_tag);
944                                        }
945                                    }
946                                }
947                                had_preceding_blank = false;
948                            } else {
949                                // Not in HTML block - check if this line starts one
950                                if let Some(tag_name) = is_block_html_opening_tag(content) {
951                                    // Flush current paragraph before starting HTML block
952                                    if in_code {
953                                        blocks.push(Block::Code {
954                                            lines: current_code_block.clone(),
955                                            has_preceding_blank: code_block_has_preceding_blank,
956                                        });
957                                        current_code_block.clear();
958                                        in_code = false;
959                                    } else if in_nested_list {
960                                        blocks.push(Block::NestedList(current_nested_list.clone()));
961                                        current_nested_list.clear();
962                                        in_nested_list = false;
963                                    } else if !current_paragraph.is_empty() {
964                                        blocks.push(Block::Paragraph(current_paragraph.clone()));
965                                        current_paragraph.clear();
966                                    }
967
968                                    // Start new HTML block
969                                    in_html_block = true;
970                                    html_block_has_preceding_blank = had_preceding_blank;
971                                    current_html_block.push(content.clone());
972
973                                    // Check if it's self-closing or needs a closing tag
974                                    if is_self_closing_tag(content) {
975                                        // Self-closing tag - complete the HTML block immediately
976                                        blocks.push(Block::Html {
977                                            lines: current_html_block.clone(),
978                                            has_preceding_blank: html_block_has_preceding_blank,
979                                        });
980                                        current_html_block.clear();
981                                        in_html_block = false;
982                                    } else {
983                                        // Regular opening tag - push to stack
984                                        html_tag_stack.push(tag_name);
985                                    }
986                                } else {
987                                    // Regular content line - add to paragraph
988                                    if in_code {
989                                        // Switching from code to content
990                                        blocks.push(Block::Code {
991                                            lines: current_code_block.clone(),
992                                            has_preceding_blank: code_block_has_preceding_blank,
993                                        });
994                                        current_code_block.clear();
995                                        in_code = false;
996                                    } else if in_nested_list {
997                                        // Switching from nested list to content
998                                        blocks.push(Block::NestedList(current_nested_list.clone()));
999                                        current_nested_list.clear();
1000                                        in_nested_list = false;
1001                                    }
1002                                    current_paragraph.push(content.clone());
1003                                }
1004                                had_preceding_blank = false; // Reset after content
1005                            }
1006                        }
1007                        LineType::CodeBlock(content, indent) => {
1008                            if in_nested_list {
1009                                // Switching from nested list to code
1010                                blocks.push(Block::NestedList(current_nested_list.clone()));
1011                                current_nested_list.clear();
1012                                in_nested_list = false;
1013                            } else if in_html_block {
1014                                // Switching from HTML block to code (shouldn't happen normally, but handle it)
1015                                blocks.push(Block::Html {
1016                                    lines: current_html_block.clone(),
1017                                    has_preceding_blank: html_block_has_preceding_blank,
1018                                });
1019                                current_html_block.clear();
1020                                html_tag_stack.clear();
1021                                in_html_block = false;
1022                            }
1023                            if !in_code {
1024                                // Switching from content to code
1025                                if !current_paragraph.is_empty() {
1026                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
1027                                    current_paragraph.clear();
1028                                }
1029                                in_code = true;
1030                                // Record whether there was a blank line before this code block
1031                                code_block_has_preceding_blank = had_preceding_blank;
1032                            }
1033                            current_code_block.push((content.clone(), *indent));
1034                            had_preceding_blank = false; // Reset after code
1035                        }
1036                        LineType::NestedListItem(content, indent) => {
1037                            if in_code {
1038                                // Switching from code to nested list
1039                                blocks.push(Block::Code {
1040                                    lines: current_code_block.clone(),
1041                                    has_preceding_blank: code_block_has_preceding_blank,
1042                                });
1043                                current_code_block.clear();
1044                                in_code = false;
1045                            } else if in_html_block {
1046                                // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1047                                blocks.push(Block::Html {
1048                                    lines: current_html_block.clone(),
1049                                    has_preceding_blank: html_block_has_preceding_blank,
1050                                });
1051                                current_html_block.clear();
1052                                html_tag_stack.clear();
1053                                in_html_block = false;
1054                            }
1055                            if !in_nested_list {
1056                                // Switching from content to nested list
1057                                if !current_paragraph.is_empty() {
1058                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
1059                                    current_paragraph.clear();
1060                                }
1061                                in_nested_list = true;
1062                            }
1063                            current_nested_list.push((content.clone(), *indent));
1064                            had_preceding_blank = false; // Reset after nested list
1065                        }
1066                        LineType::SemanticLine(content) => {
1067                            // Semantic lines are standalone - flush any current block and add as separate block
1068                            if in_code {
1069                                blocks.push(Block::Code {
1070                                    lines: current_code_block.clone(),
1071                                    has_preceding_blank: code_block_has_preceding_blank,
1072                                });
1073                                current_code_block.clear();
1074                                in_code = false;
1075                            } else if in_nested_list {
1076                                blocks.push(Block::NestedList(current_nested_list.clone()));
1077                                current_nested_list.clear();
1078                                in_nested_list = false;
1079                            } else if in_html_block {
1080                                blocks.push(Block::Html {
1081                                    lines: current_html_block.clone(),
1082                                    has_preceding_blank: html_block_has_preceding_blank,
1083                                });
1084                                current_html_block.clear();
1085                                html_tag_stack.clear();
1086                                in_html_block = false;
1087                            } else if !current_paragraph.is_empty() {
1088                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1089                                current_paragraph.clear();
1090                            }
1091                            // Add semantic line as its own block
1092                            blocks.push(Block::SemanticLine(content.clone()));
1093                            had_preceding_blank = false; // Reset after semantic line
1094                        }
1095                        LineType::SnippetLine(content) => {
1096                            // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1097                            // Unlike semantic lines, snippet lines don't add extra blank lines around them
1098                            if in_code {
1099                                blocks.push(Block::Code {
1100                                    lines: current_code_block.clone(),
1101                                    has_preceding_blank: code_block_has_preceding_blank,
1102                                });
1103                                current_code_block.clear();
1104                                in_code = false;
1105                            } else if in_nested_list {
1106                                blocks.push(Block::NestedList(current_nested_list.clone()));
1107                                current_nested_list.clear();
1108                                in_nested_list = false;
1109                            } else if in_html_block {
1110                                blocks.push(Block::Html {
1111                                    lines: current_html_block.clone(),
1112                                    has_preceding_blank: html_block_has_preceding_blank,
1113                                });
1114                                current_html_block.clear();
1115                                html_tag_stack.clear();
1116                                in_html_block = false;
1117                            } else if !current_paragraph.is_empty() {
1118                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1119                                current_paragraph.clear();
1120                            }
1121                            // Add snippet line as its own block
1122                            blocks.push(Block::SnippetLine(content.clone()));
1123                            had_preceding_blank = false;
1124                        }
1125                    }
1126                }
1127
1128                // Push remaining block
1129                if in_code && !current_code_block.is_empty() {
1130                    blocks.push(Block::Code {
1131                        lines: current_code_block,
1132                        has_preceding_blank: code_block_has_preceding_blank,
1133                    });
1134                } else if in_nested_list && !current_nested_list.is_empty() {
1135                    blocks.push(Block::NestedList(current_nested_list));
1136                } else if in_html_block && !current_html_block.is_empty() {
1137                    // If we still have an unclosed HTML block, push it anyway
1138                    // (malformed HTML - missing closing tag)
1139                    blocks.push(Block::Html {
1140                        lines: current_html_block,
1141                        has_preceding_blank: html_block_has_preceding_blank,
1142                    });
1143                } else if !current_paragraph.is_empty() {
1144                    blocks.push(Block::Paragraph(current_paragraph));
1145                }
1146
1147                // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1148                let content_lines: Vec<String> = list_item_lines
1149                    .iter()
1150                    .filter_map(|line| {
1151                        if let LineType::Content(s) = line {
1152                            Some(s.clone())
1153                        } else {
1154                            None
1155                        }
1156                    })
1157                    .collect();
1158
1159                // Check if we need to reflow this list item
1160                // We check the combined content to see if it exceeds length limits
1161                let combined_content = content_lines.join(" ").trim().to_string();
1162                let full_line = format!("{marker}{combined_content}");
1163
1164                // Helper to check if we should reflow in normalize mode
1165                let should_normalize = || {
1166                    // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1167                    // DO normalize if it has plain text content that spans multiple lines
1168                    let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1169                    let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1170                    let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1171                    let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1172                    let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1173
1174                    // If we have nested lists, code blocks, semantic lines, or snippet lines but no paragraphs, don't normalize
1175                    if (has_nested_lists || has_code_blocks || has_semantic_lines || has_snippet_lines)
1176                        && !has_paragraphs
1177                    {
1178                        return false;
1179                    }
1180
1181                    // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1182                    if has_paragraphs {
1183                        let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1184                        if paragraph_count > 1 {
1185                            // Multiple paragraph blocks should be normalized
1186                            return true;
1187                        }
1188
1189                        // Single paragraph block: normalize if it has multiple content lines
1190                        if content_lines.len() > 1 {
1191                            return true;
1192                        }
1193                    }
1194
1195                    false
1196                };
1197
1198                let needs_reflow = match config.reflow_mode {
1199                    ReflowMode::Normalize => {
1200                        // Only reflow if:
1201                        // 1. The combined line would exceed the limit, OR
1202                        // 2. The list item should be normalized (has multi-line plain text)
1203                        let combined_length = self.calculate_effective_length(&full_line);
1204                        if combined_length > config.line_length.get() {
1205                            true
1206                        } else {
1207                            should_normalize()
1208                        }
1209                    }
1210                    ReflowMode::SentencePerLine => {
1211                        // Check if list item has multiple sentences
1212                        let sentences = split_into_sentences(&combined_content);
1213                        sentences.len() > 1
1214                    }
1215                    ReflowMode::Default => {
1216                        // In default mode, only reflow if any individual line exceeds limit
1217                        // Check the original lines, not the combined content
1218                        (list_start..i)
1219                            .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1220                    }
1221                };
1222
1223                if needs_reflow {
1224                    let start_range = line_index.whole_line_range(list_start + 1);
1225                    let end_line = i - 1;
1226                    let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1227                        line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1228                    } else {
1229                        line_index.whole_line_range(end_line + 1)
1230                    };
1231                    let byte_range = start_range.start..end_range.end;
1232
1233                    // Reflow each block (paragraphs only, preserve code blocks)
1234                    // When line_length = 0 (no limit), use a very large value for reflow
1235                    let reflow_line_length = if config.line_length.is_unlimited() {
1236                        usize::MAX
1237                    } else {
1238                        config.line_length.get().saturating_sub(indent_size).max(1)
1239                    };
1240                    let reflow_options = crate::utils::text_reflow::ReflowOptions {
1241                        line_length: reflow_line_length,
1242                        break_on_sentences: true,
1243                        preserve_breaks: false,
1244                        sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1245                        abbreviations: config.abbreviations_for_reflow(),
1246                    };
1247
1248                    let mut result: Vec<String> = Vec::new();
1249                    let mut is_first_block = true;
1250
1251                    for (block_idx, block) in blocks.iter().enumerate() {
1252                        match block {
1253                            Block::Paragraph(para_lines) => {
1254                                // Split the paragraph into segments at hard break boundaries
1255                                // Each segment can be reflowed independently
1256                                let segments = split_into_segments(para_lines);
1257
1258                                for (segment_idx, segment) in segments.iter().enumerate() {
1259                                    // Check if this segment ends with a hard break and what type
1260                                    let hard_break_type = segment.last().and_then(|line| {
1261                                        let line = line.strip_suffix('\r').unwrap_or(line);
1262                                        if line.ends_with('\\') {
1263                                            Some("\\")
1264                                        } else if line.ends_with("  ") {
1265                                            Some("  ")
1266                                        } else {
1267                                            None
1268                                        }
1269                                    });
1270
1271                                    // Join and reflow the segment (removing the hard break marker for processing)
1272                                    let segment_for_reflow: Vec<String> = segment
1273                                        .iter()
1274                                        .map(|line| {
1275                                            // Strip hard break marker (2 spaces or backslash) for reflow processing
1276                                            if line.ends_with('\\') {
1277                                                line[..line.len() - 1].trim_end().to_string()
1278                                            } else if line.ends_with("  ") {
1279                                                line[..line.len() - 2].trim_end().to_string()
1280                                            } else {
1281                                                line.clone()
1282                                            }
1283                                        })
1284                                        .collect();
1285
1286                                    let segment_text = segment_for_reflow.join(" ").trim().to_string();
1287                                    if !segment_text.is_empty() {
1288                                        let reflowed =
1289                                            crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1290
1291                                        if is_first_block && segment_idx == 0 {
1292                                            // First segment of first block starts with marker
1293                                            result.push(format!("{marker}{}", reflowed[0]));
1294                                            for line in reflowed.iter().skip(1) {
1295                                                result.push(format!("{expected_indent}{line}"));
1296                                            }
1297                                            is_first_block = false;
1298                                        } else {
1299                                            // Subsequent segments
1300                                            for line in reflowed {
1301                                                result.push(format!("{expected_indent}{line}"));
1302                                            }
1303                                        }
1304
1305                                        // If this segment had a hard break, add it back to the last line
1306                                        // Preserve the original hard break format (backslash or two spaces)
1307                                        if let Some(break_marker) = hard_break_type
1308                                            && let Some(last_line) = result.last_mut()
1309                                        {
1310                                            last_line.push_str(break_marker);
1311                                        }
1312                                    }
1313                                }
1314
1315                                // Add blank line after paragraph block if there's a next block
1316                                // BUT: check if next block is a code block that doesn't want a preceding blank
1317                                // Also don't add blank lines before snippet lines (they should stay tight)
1318                                if block_idx < blocks.len() - 1 {
1319                                    let next_block = &blocks[block_idx + 1];
1320                                    let should_add_blank = match next_block {
1321                                        Block::Code {
1322                                            has_preceding_blank, ..
1323                                        } => *has_preceding_blank,
1324                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1325                                        _ => true,                      // For all other blocks, add blank line
1326                                    };
1327                                    if should_add_blank {
1328                                        result.push(String::new());
1329                                    }
1330                                }
1331                            }
1332                            Block::Code {
1333                                lines: code_lines,
1334                                has_preceding_blank: _,
1335                            } => {
1336                                // Preserve code blocks as-is with original indentation
1337                                // NOTE: Blank line before code block is handled by the previous block
1338                                // (see paragraph block's logic above)
1339
1340                                for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1341                                    if is_first_block && idx == 0 {
1342                                        // First line of first block gets marker
1343                                        result.push(format!(
1344                                            "{marker}{}",
1345                                            " ".repeat(orig_indent - marker_len) + content
1346                                        ));
1347                                        is_first_block = false;
1348                                    } else if content.is_empty() {
1349                                        result.push(String::new());
1350                                    } else {
1351                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1352                                    }
1353                                }
1354                            }
1355                            Block::NestedList(nested_items) => {
1356                                // Preserve nested list items as-is with original indentation
1357                                if !is_first_block {
1358                                    result.push(String::new());
1359                                }
1360
1361                                for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1362                                    if is_first_block && idx == 0 {
1363                                        // First line of first block gets marker
1364                                        result.push(format!(
1365                                            "{marker}{}",
1366                                            " ".repeat(orig_indent - marker_len) + content
1367                                        ));
1368                                        is_first_block = false;
1369                                    } else if content.is_empty() {
1370                                        result.push(String::new());
1371                                    } else {
1372                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1373                                    }
1374                                }
1375
1376                                // Add blank line after nested list if there's a next block
1377                                // Check if next block is a code block that doesn't want a preceding blank
1378                                if block_idx < blocks.len() - 1 {
1379                                    let next_block = &blocks[block_idx + 1];
1380                                    let should_add_blank = match next_block {
1381                                        Block::Code {
1382                                            has_preceding_blank, ..
1383                                        } => *has_preceding_blank,
1384                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1385                                        _ => true,                      // For all other blocks, add blank line
1386                                    };
1387                                    if should_add_blank {
1388                                        result.push(String::new());
1389                                    }
1390                                }
1391                            }
1392                            Block::SemanticLine(content) => {
1393                                // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1394                                // Add blank line before if not first block
1395                                if !is_first_block {
1396                                    result.push(String::new());
1397                                }
1398
1399                                if is_first_block {
1400                                    // First block starts with marker
1401                                    result.push(format!("{marker}{content}"));
1402                                    is_first_block = false;
1403                                } else {
1404                                    // Subsequent blocks use expected indent
1405                                    result.push(format!("{expected_indent}{content}"));
1406                                }
1407
1408                                // Add blank line after semantic line if there's a next block
1409                                // Check if next block is a code block that doesn't want a preceding blank
1410                                if block_idx < blocks.len() - 1 {
1411                                    let next_block = &blocks[block_idx + 1];
1412                                    let should_add_blank = match next_block {
1413                                        Block::Code {
1414                                            has_preceding_blank, ..
1415                                        } => *has_preceding_blank,
1416                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1417                                        _ => true,                      // For all other blocks, add blank line
1418                                    };
1419                                    if should_add_blank {
1420                                        result.push(String::new());
1421                                    }
1422                                }
1423                            }
1424                            Block::SnippetLine(content) => {
1425                                // Preserve snippet delimiters (-8<-) as-is on their own line
1426                                // Unlike semantic lines, snippet lines don't add extra blank lines
1427                                if is_first_block {
1428                                    // First block starts with marker
1429                                    result.push(format!("{marker}{content}"));
1430                                    is_first_block = false;
1431                                } else {
1432                                    // Subsequent blocks use expected indent
1433                                    result.push(format!("{expected_indent}{content}"));
1434                                }
1435                                // No blank lines added before or after snippet delimiters
1436                            }
1437                            Block::Html {
1438                                lines: html_lines,
1439                                has_preceding_blank: _,
1440                            } => {
1441                                // Preserve HTML blocks exactly as-is with original indentation
1442                                // NOTE: Blank line before HTML block is handled by the previous block
1443
1444                                for (idx, line) in html_lines.iter().enumerate() {
1445                                    if is_first_block && idx == 0 {
1446                                        // First line of first block gets marker
1447                                        result.push(format!("{marker}{line}"));
1448                                        is_first_block = false;
1449                                    } else if line.is_empty() {
1450                                        // Preserve blank lines inside HTML blocks
1451                                        result.push(String::new());
1452                                    } else {
1453                                        // Preserve lines with their original content (already includes indentation)
1454                                        result.push(format!("{expected_indent}{line}"));
1455                                    }
1456                                }
1457
1458                                // Add blank line after HTML block if there's a next block
1459                                if block_idx < blocks.len() - 1 {
1460                                    let next_block = &blocks[block_idx + 1];
1461                                    let should_add_blank = match next_block {
1462                                        Block::Code {
1463                                            has_preceding_blank, ..
1464                                        } => *has_preceding_blank,
1465                                        Block::Html {
1466                                            has_preceding_blank, ..
1467                                        } => *has_preceding_blank,
1468                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1469                                        _ => true,                      // For all other blocks, add blank line
1470                                    };
1471                                    if should_add_blank {
1472                                        result.push(String::new());
1473                                    }
1474                                }
1475                            }
1476                        }
1477                    }
1478
1479                    let reflowed_text = result.join("\n");
1480
1481                    // Preserve trailing newline
1482                    let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1483                        format!("{reflowed_text}\n")
1484                    } else {
1485                        reflowed_text
1486                    };
1487
1488                    // Get the original text to compare
1489                    let original_text = &ctx.content[byte_range.clone()];
1490
1491                    // Only generate a warning if the replacement is different from the original
1492                    if original_text != replacement {
1493                        // Generate an appropriate message based on why reflow is needed
1494                        let message = match config.reflow_mode {
1495                            ReflowMode::SentencePerLine => {
1496                                let num_sentences = split_into_sentences(&combined_content).len();
1497                                let num_lines = content_lines.len();
1498                                if num_lines == 1 {
1499                                    // Single line with multiple sentences
1500                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1501                                } else {
1502                                    // Multiple lines - could be split sentences or mixed
1503                                    format!(
1504                                        "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1505                                    )
1506                                }
1507                            }
1508                            ReflowMode::Normalize => {
1509                                let combined_length = self.calculate_effective_length(&full_line);
1510                                if combined_length > config.line_length.get() {
1511                                    format!(
1512                                        "Line length {} exceeds {} characters",
1513                                        combined_length,
1514                                        config.line_length.get()
1515                                    )
1516                                } else {
1517                                    "Multi-line content can be normalized".to_string()
1518                                }
1519                            }
1520                            ReflowMode::Default => {
1521                                let combined_length = self.calculate_effective_length(&full_line);
1522                                format!(
1523                                    "Line length {} exceeds {} characters",
1524                                    combined_length,
1525                                    config.line_length.get()
1526                                )
1527                            }
1528                        };
1529
1530                        warnings.push(LintWarning {
1531                            rule_name: Some(self.name().to_string()),
1532                            message,
1533                            line: list_start + 1,
1534                            column: 1,
1535                            end_line: end_line + 1,
1536                            end_column: lines[end_line].len() + 1,
1537                            severity: Severity::Warning,
1538                            fix: Some(crate::rule::Fix {
1539                                range: byte_range,
1540                                replacement,
1541                            }),
1542                        });
1543                    }
1544                }
1545                continue;
1546            }
1547
1548            // Found start of a paragraph - collect all lines in it
1549            let paragraph_start = i;
1550            let mut paragraph_lines = vec![lines[i]];
1551            i += 1;
1552
1553            while i < lines.len() {
1554                let next_line = lines[i];
1555                let next_line_num = i + 1;
1556                let next_trimmed = next_line.trim();
1557
1558                // Stop at paragraph boundaries
1559                if next_trimmed.is_empty()
1560                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1561                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1562                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1563                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1564                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1565                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1566                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1567                    || ctx
1568                        .line_info(next_line_num)
1569                        .is_some_and(|info| info.in_mkdocs_container())
1570                    || (next_line_num > 0
1571                        && next_line_num <= ctx.lines.len()
1572                        && ctx.lines[next_line_num - 1].blockquote.is_some())
1573                    || next_trimmed.starts_with('#')
1574                    || TableUtils::is_potential_table_row(next_line)
1575                    || is_list_item(next_trimmed)
1576                    || is_horizontal_rule(next_trimmed)
1577                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1578                    || is_template_directive_only(next_line)
1579                    || is_standalone_attr_list(next_line)
1580                    || is_snippet_block_delimiter(next_line)
1581                {
1582                    break;
1583                }
1584
1585                // Check if the previous line ends with a hard break (2+ spaces or backslash)
1586                if i > 0 && has_hard_break(lines[i - 1]) {
1587                    // Don't include lines after hard breaks in the same paragraph
1588                    break;
1589                }
1590
1591                paragraph_lines.push(next_line);
1592                i += 1;
1593            }
1594
1595            // Combine paragraph lines into a single string for processing
1596            // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1597            let paragraph_text = paragraph_lines.join(" ");
1598
1599            // Skip reflowing if this paragraph contains definition list items
1600            // Definition lists are multi-line structures that should not be joined
1601            let contains_definition_list = paragraph_lines
1602                .iter()
1603                .any(|line| crate::utils::is_definition_list_item(line));
1604
1605            if contains_definition_list {
1606                // Don't reflow definition lists - skip this paragraph
1607                i = paragraph_start + paragraph_lines.len();
1608                continue;
1609            }
1610
1611            // Skip reflowing if this paragraph contains MkDocs Snippets markers
1612            // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1613            let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1614
1615            if contains_snippets {
1616                // Don't reflow Snippets blocks - skip this paragraph
1617                i = paragraph_start + paragraph_lines.len();
1618                continue;
1619            }
1620
1621            // Check if this paragraph needs reflowing
1622            let needs_reflow = match config.reflow_mode {
1623                ReflowMode::Normalize => {
1624                    // In normalize mode, reflow multi-line paragraphs
1625                    paragraph_lines.len() > 1
1626                }
1627                ReflowMode::SentencePerLine => {
1628                    // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1629                    // Note: we check the joined text because sentences can span multiple lines
1630                    let sentences = split_into_sentences(&paragraph_text);
1631
1632                    // Always reflow if multiple sentences on one line
1633                    if sentences.len() > 1 {
1634                        true
1635                    } else if paragraph_lines.len() > 1 {
1636                        // For single-sentence paragraphs spanning multiple lines:
1637                        // Reflow if they COULD fit on one line (respecting line-length constraint)
1638                        if config.line_length.is_unlimited() {
1639                            // No line-length constraint - always join single sentences
1640                            true
1641                        } else {
1642                            // Only join if it fits within line-length
1643                            let effective_length = self.calculate_effective_length(&paragraph_text);
1644                            effective_length <= config.line_length.get()
1645                        }
1646                    } else {
1647                        false
1648                    }
1649                }
1650                ReflowMode::Default => {
1651                    // In default mode, only reflow if lines exceed limit
1652                    paragraph_lines
1653                        .iter()
1654                        .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1655                }
1656            };
1657
1658            if needs_reflow {
1659                // Calculate byte range for this paragraph
1660                // Use whole_line_range for each line and combine
1661                let start_range = line_index.whole_line_range(paragraph_start + 1);
1662                let end_line = paragraph_start + paragraph_lines.len() - 1;
1663
1664                // For the last line, we want to preserve any trailing newline
1665                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1666                    // Last line without trailing newline - use line_text_range
1667                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1668                } else {
1669                    // Not the last line or has trailing newline - use whole_line_range
1670                    line_index.whole_line_range(end_line + 1)
1671                };
1672
1673                let byte_range = start_range.start..end_range.end;
1674
1675                // Check if the paragraph ends with a hard break and what type
1676                let hard_break_type = paragraph_lines.last().and_then(|line| {
1677                    let line = line.strip_suffix('\r').unwrap_or(line);
1678                    if line.ends_with('\\') {
1679                        Some("\\")
1680                    } else if line.ends_with("  ") {
1681                        Some("  ")
1682                    } else {
1683                        None
1684                    }
1685                });
1686
1687                // Reflow the paragraph
1688                // When line_length = 0 (no limit), use a very large value for reflow
1689                let reflow_line_length = if config.line_length.is_unlimited() {
1690                    usize::MAX
1691                } else {
1692                    config.line_length.get()
1693                };
1694                let reflow_options = crate::utils::text_reflow::ReflowOptions {
1695                    line_length: reflow_line_length,
1696                    break_on_sentences: true,
1697                    preserve_breaks: false,
1698                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1699                    abbreviations: config.abbreviations_for_reflow(),
1700                };
1701                let mut reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
1702
1703                // If the original paragraph ended with a hard break, preserve it
1704                // Preserve the original hard break format (backslash or two spaces)
1705                if let Some(break_marker) = hard_break_type
1706                    && !reflowed.is_empty()
1707                {
1708                    let last_idx = reflowed.len() - 1;
1709                    if !has_hard_break(&reflowed[last_idx]) {
1710                        reflowed[last_idx].push_str(break_marker);
1711                    }
1712                }
1713
1714                let reflowed_text = reflowed.join("\n");
1715
1716                // Preserve trailing newline if the original paragraph had one
1717                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1718                    format!("{reflowed_text}\n")
1719                } else {
1720                    reflowed_text
1721                };
1722
1723                // Get the original text to compare
1724                let original_text = &ctx.content[byte_range.clone()];
1725
1726                // Only generate a warning if the replacement is different from the original
1727                if original_text != replacement {
1728                    // Create warning with actual fix
1729                    // In default mode, report the specific line that violates
1730                    // In normalize mode, report the whole paragraph
1731                    // In sentence-per-line mode, report the entire paragraph
1732                    let (warning_line, warning_end_line) = match config.reflow_mode {
1733                        ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1734                        ReflowMode::SentencePerLine => {
1735                            // Highlight the entire paragraph that needs reformatting
1736                            (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1737                        }
1738                        ReflowMode::Default => {
1739                            // Find the first line that exceeds the limit
1740                            let mut violating_line = paragraph_start;
1741                            for (idx, line) in paragraph_lines.iter().enumerate() {
1742                                if self.calculate_effective_length(line) > config.line_length.get() {
1743                                    violating_line = paragraph_start + idx;
1744                                    break;
1745                                }
1746                            }
1747                            (violating_line + 1, violating_line + 1)
1748                        }
1749                    };
1750
1751                    warnings.push(LintWarning {
1752                        rule_name: Some(self.name().to_string()),
1753                        message: match config.reflow_mode {
1754                            ReflowMode::Normalize => format!(
1755                                "Paragraph could be normalized to use line length of {} characters",
1756                                config.line_length.get()
1757                            ),
1758                            ReflowMode::SentencePerLine => {
1759                                let num_sentences = split_into_sentences(&paragraph_text).len();
1760                                if paragraph_lines.len() == 1 {
1761                                    // Single line with multiple sentences
1762                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1763                                } else {
1764                                    let num_lines = paragraph_lines.len();
1765                                    // Multiple lines - could be split sentences or mixed
1766                                    format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1767                                }
1768                            },
1769                            ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1770                        },
1771                        line: warning_line,
1772                        column: 1,
1773                        end_line: warning_end_line,
1774                        end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1775                        severity: Severity::Warning,
1776                        fix: Some(crate::rule::Fix {
1777                            range: byte_range,
1778                            replacement,
1779                        }),
1780                    });
1781                }
1782            }
1783        }
1784
1785        warnings
1786    }
1787
1788    /// Calculate string length based on the configured length mode
1789    fn calculate_string_length(&self, s: &str) -> usize {
1790        match self.config.length_mode {
1791            LengthMode::Chars => s.chars().count(),
1792            LengthMode::Visual => s.width(),
1793            LengthMode::Bytes => s.len(),
1794        }
1795    }
1796
1797    /// Calculate effective line length excluding unbreakable URLs
1798    fn calculate_effective_length(&self, line: &str) -> usize {
1799        if self.config.strict {
1800            // In strict mode, count everything
1801            return self.calculate_string_length(line);
1802        }
1803
1804        // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1805        let bytes = line.as_bytes();
1806        if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1807            return self.calculate_string_length(line);
1808        }
1809
1810        // More precise check for URLs and links
1811        if !line.contains("http") && !line.contains('[') {
1812            return self.calculate_string_length(line);
1813        }
1814
1815        let mut effective_line = line.to_string();
1816
1817        // First handle markdown links to avoid double-counting URLs
1818        // Pattern: [text](very-long-url) -> [text](url)
1819        if line.contains('[') && line.contains("](") {
1820            for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1821                if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1822                    && url.as_str().len() > 15
1823                {
1824                    let replacement = format!("[{}](url)", text.as_str());
1825                    effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1826                }
1827            }
1828        }
1829
1830        // Then replace bare URLs with a placeholder of reasonable length
1831        // This allows lines with long URLs to pass if the rest of the content is reasonable
1832        if effective_line.contains("http") {
1833            for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1834                let url = url_match.as_str();
1835                // Skip if this URL is already part of a markdown link we handled
1836                if !effective_line.contains(&format!("({url})")) {
1837                    // Replace URL with placeholder that represents a "reasonable" URL length
1838                    // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1839                    let placeholder = "x".repeat(15.min(url.len()));
1840                    effective_line = effective_line.replacen(url, &placeholder, 1);
1841                }
1842            }
1843        }
1844
1845        self.calculate_string_length(&effective_line)
1846    }
1847}
rumdl_lib/rules/md013_line_length/mod.rs

rumdl_lib/rules/md013_line_length/
mod.rs