Skip to main content

rumdl_lib/rules/md013_line_length/
mod.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
7use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
8use crate::utils::range_utils::LineIndex;
9use crate::utils::range_utils::calculate_excess_range;
10use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18    extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19    split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30    pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35        Self {
36            config: MD013Config {
37                line_length: crate::types::LineLength::new(line_length),
38                code_blocks,
39                tables,
40                headings,
41                paragraphs: true, // Default to true for backwards compatibility
42                strict,
43                reflow: false,
44                reflow_mode: ReflowMode::default(),
45                length_mode: LengthMode::default(),
46                abbreviations: Vec::new(),
47            },
48        }
49    }
50
51    pub fn from_config_struct(config: MD013Config) -> Self {
52        Self { config }
53    }
54
55    fn should_ignore_line(
56        &self,
57        line: &str,
58        _lines: &[&str],
59        current_line: usize,
60        ctx: &crate::lint_context::LintContext,
61    ) -> bool {
62        if self.config.strict {
63            return false;
64        }
65
66        // Quick check for common patterns before expensive regex
67        let trimmed = line.trim();
68
69        // Only skip if the entire line is a URL (quick check first)
70        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71            return true;
72        }
73
74        // Only skip if the entire line is an image reference (quick check first)
75        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76            return true;
77        }
78
79        // Only skip if the entire line is a link reference (quick check first)
80        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81            return true;
82        }
83
84        // Code blocks with long strings (only check if in code block)
85        if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86            && !trimmed.is_empty()
87            && !line.contains(' ')
88            && !line.contains('\t')
89        {
90            return true;
91        }
92
93        false
94    }
95
96    /// Check if rule should skip based on provided config (used for inline config support)
97    fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
98        // Skip if content is empty
99        if ctx.content.is_empty() {
100            return true;
101        }
102
103        // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
104        if config.reflow
105            && (config.reflow_mode == ReflowMode::SentencePerLine
106                || config.reflow_mode == ReflowMode::SemanticLineBreaks
107                || config.reflow_mode == ReflowMode::Normalize)
108        {
109            return false;
110        }
111
112        // Quick check: if total content is shorter than line limit, definitely skip
113        if ctx.content.len() <= config.line_length.get() {
114            return true;
115        }
116
117        // Skip if no line exceeds the limit
118        !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
119    }
120}
121
122impl Rule for MD013LineLength {
123    fn name(&self) -> &'static str {
124        "MD013"
125    }
126
127    fn description(&self) -> &'static str {
128        "Line length should not be excessive"
129    }
130
131    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
132        // Use pre-parsed inline config from LintContext
133        let config_override = ctx.inline_config().get_rule_config("MD013");
134
135        // Apply configuration override if present
136        let effective_config = if let Some(json_config) = config_override {
137            if let Some(obj) = json_config.as_object() {
138                let mut config = self.config.clone();
139                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
140                    config.line_length = crate::types::LineLength::new(line_length as usize);
141                }
142                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
143                    config.code_blocks = code_blocks;
144                }
145                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
146                    config.tables = tables;
147                }
148                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
149                    config.headings = headings;
150                }
151                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
152                    config.strict = strict;
153                }
154                if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
155                    config.reflow = reflow;
156                }
157                if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
158                    config.reflow_mode = match reflow_mode {
159                        "default" => ReflowMode::Default,
160                        "normalize" => ReflowMode::Normalize,
161                        "sentence-per-line" => ReflowMode::SentencePerLine,
162                        "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
163                        _ => ReflowMode::default(),
164                    };
165                }
166                config
167            } else {
168                self.config.clone()
169            }
170        } else {
171            self.config.clone()
172        };
173
174        // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
175        // But don't skip if we're in reflow mode with Normalize or SentencePerLine
176        if self.should_skip_with_config(ctx, &effective_config)
177            && !(effective_config.reflow
178                && (effective_config.reflow_mode == ReflowMode::Normalize
179                    || effective_config.reflow_mode == ReflowMode::SentencePerLine
180                    || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
181        {
182            return Ok(Vec::new());
183        }
184
185        // Direct implementation without DocumentStructure
186        let mut warnings = Vec::new();
187
188        // Special handling: line_length = 0 means "no line length limit"
189        // Skip all line length checks, but still allow reflow if enabled
190        let skip_length_checks = effective_config.line_length.is_unlimited();
191
192        // Pre-filter lines that could be problematic to avoid processing all lines
193        let mut candidate_lines = Vec::new();
194        if !skip_length_checks {
195            for (line_idx, line_info) in ctx.lines.iter().enumerate() {
196                // Skip front matter - it should never be linted
197                if line_info.in_front_matter {
198                    continue;
199                }
200
201                // Quick length check first
202                if line_info.byte_len > effective_config.line_length.get() {
203                    candidate_lines.push(line_idx);
204                }
205            }
206        }
207
208        // If no candidate lines and not in normalize or sentence-per-line mode, early return
209        if candidate_lines.is_empty()
210            && !(effective_config.reflow
211                && (effective_config.reflow_mode == ReflowMode::Normalize
212                    || effective_config.reflow_mode == ReflowMode::SentencePerLine
213                    || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
214        {
215            return Ok(warnings);
216        }
217
218        let lines = ctx.raw_lines();
219
220        // Create a quick lookup set for heading lines
221        // We need this for both the heading skip check AND the paragraphs check
222        let heading_lines_set: std::collections::HashSet<usize> = ctx
223            .lines
224            .iter()
225            .enumerate()
226            .filter(|(_, line)| line.heading.is_some())
227            .map(|(idx, _)| idx + 1)
228            .collect();
229
230        // Use pre-computed table blocks from context
231        // We need this for both the table skip check AND the paragraphs check
232        let table_blocks = &ctx.table_blocks;
233        let mut table_lines_set = std::collections::HashSet::new();
234        for table in table_blocks {
235            table_lines_set.insert(table.header_line + 1);
236            table_lines_set.insert(table.delimiter_line + 1);
237            for &line in &table.content_lines {
238                table_lines_set.insert(line + 1);
239            }
240        }
241
242        // Process candidate lines for line length checks
243        for &line_idx in &candidate_lines {
244            let line_number = line_idx + 1;
245            let line = lines[line_idx];
246
247            // Calculate actual line length
248            let effective_length = self.calculate_effective_length(line);
249
250            // Use single line length limit for all content
251            let line_limit = effective_config.line_length.get();
252
253            // Skip short lines immediately
254            if effective_length <= line_limit {
255                continue;
256            }
257
258            // Skip mkdocstrings blocks (already handled by LintContext)
259            if ctx.lines[line_idx].in_mkdocstrings {
260                continue;
261            }
262
263            // Skip various block types efficiently
264            if !effective_config.strict {
265                // Skip setext heading underlines
266                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
267                    continue;
268                }
269
270                // Skip block elements according to config flags
271                // The flags mean: true = check these elements, false = skip these elements
272                // So we skip when the flag is FALSE and the line is in that element type
273                if (!effective_config.headings && heading_lines_set.contains(&line_number))
274                    || (!effective_config.code_blocks
275                        && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
276                    || (!effective_config.tables && table_lines_set.contains(&line_number))
277                    || ctx.lines[line_number - 1].blockquote.is_some()
278                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
279                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
280                    || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
281                    || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
282                    || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
283                {
284                    continue;
285                }
286
287                // Check if this is a paragraph/regular text line
288                // If paragraphs = false, skip lines that are NOT in special blocks
289                if !effective_config.paragraphs {
290                    let is_special_block = heading_lines_set.contains(&line_number)
291                        || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
292                        || table_lines_set.contains(&line_number)
293                        || ctx.lines[line_number - 1].blockquote.is_some()
294                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
295                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
296                        || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
297                        || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
298                        || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
299                        || ctx
300                            .line_info(line_number)
301                            .is_some_and(|info| info.in_mkdocs_container());
302
303                    // Skip regular paragraph text when paragraphs = false
304                    if !is_special_block {
305                        continue;
306                    }
307                }
308
309                // Skip lines that are only a URL, image ref, or link ref
310                if self.should_ignore_line(line, lines, line_idx, ctx) {
311                    continue;
312                }
313            }
314
315            // In sentence-per-line mode, check if this is a single long sentence
316            // If so, emit a warning without a fix (user must manually rephrase)
317            if effective_config.reflow_mode == ReflowMode::SentencePerLine {
318                let sentences = split_into_sentences(line.trim());
319                if sentences.len() == 1 {
320                    // Single sentence that's too long - warn but don't auto-fix
321                    let message = format!("Line length {effective_length} exceeds {line_limit} characters");
322
323                    let (start_line, start_col, end_line, end_col) =
324                        calculate_excess_range(line_number, line, line_limit);
325
326                    warnings.push(LintWarning {
327                        rule_name: Some(self.name().to_string()),
328                        message,
329                        line: start_line,
330                        column: start_col,
331                        end_line,
332                        end_column: end_col,
333                        severity: Severity::Warning,
334                        fix: None, // No auto-fix for long single sentences
335                    });
336                    continue;
337                }
338                // Multiple sentences will be handled by paragraph-based reflow
339                continue;
340            }
341
342            // In semantic-line-breaks mode, skip per-line checks —
343            // all reflow is handled at the paragraph level with cascading splits
344            if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
345                continue;
346            }
347
348            // Don't provide fix for individual lines when reflow is enabled
349            // Paragraph-based fixes will be handled separately
350            let fix = None;
351
352            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
353
354            // Calculate precise character range for the excess portion
355            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
356
357            warnings.push(LintWarning {
358                rule_name: Some(self.name().to_string()),
359                message,
360                line: start_line,
361                column: start_col,
362                end_line,
363                end_column: end_col,
364                severity: Severity::Warning,
365                fix,
366            });
367        }
368
369        // If reflow is enabled, generate paragraph-based fixes
370        if effective_config.reflow {
371            let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
372            // Merge paragraph warnings with line warnings, removing duplicates
373            for pw in paragraph_warnings {
374                // Remove any line warnings that overlap with this paragraph
375                warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
376                warnings.push(pw);
377            }
378        }
379
380        Ok(warnings)
381    }
382
383    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
384        // For CLI usage, apply fixes from warnings
385        // LSP will use the warning-based fixes directly
386        let warnings = self.check(ctx)?;
387
388        // If there are no fixes, return content unchanged
389        if !warnings.iter().any(|w| w.fix.is_some()) {
390            return Ok(ctx.content.to_string());
391        }
392
393        // Apply warning-based fixes
394        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
395            .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
396    }
397
398    fn as_any(&self) -> &dyn std::any::Any {
399        self
400    }
401
402    fn category(&self) -> RuleCategory {
403        RuleCategory::Whitespace
404    }
405
406    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
407        self.should_skip_with_config(ctx, &self.config)
408    }
409
410    fn default_config_section(&self) -> Option<(String, toml::Value)> {
411        let default_config = MD013Config::default();
412        let json_value = serde_json::to_value(&default_config).ok()?;
413        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
414
415        if let toml::Value::Table(table) = toml_value {
416            if !table.is_empty() {
417                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
418            } else {
419                None
420            }
421        } else {
422            None
423        }
424    }
425
426    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
427        let mut aliases = std::collections::HashMap::new();
428        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
429        Some(aliases)
430    }
431
432    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
433    where
434        Self: Sized,
435    {
436        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
437        // Use global line_length if rule-specific config still has default value
438        if rule_config.line_length.get() == 80 {
439            rule_config.line_length = config.global.line_length;
440        }
441        Box::new(Self::from_config_struct(rule_config))
442    }
443}
444
445impl MD013LineLength {
446    /// Generate paragraph-based fixes
447    fn generate_paragraph_fixes(
448        &self,
449        ctx: &crate::lint_context::LintContext,
450        config: &MD013Config,
451        lines: &[&str],
452    ) -> Vec<LintWarning> {
453        let mut warnings = Vec::new();
454        let line_index = LineIndex::new(ctx.content);
455
456        let mut i = 0;
457        while i < lines.len() {
458            let line_num = i + 1;
459
460            // Skip special structures (but NOT MkDocs containers - those get special handling)
461            let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
462                info.in_code_block
463                    || info.in_front_matter
464                    || info.in_html_block
465                    || info.in_html_comment
466                    || info.in_esm_block
467                    || info.in_jsx_expression
468                    || info.in_mdx_comment
469            });
470
471            if should_skip_due_to_line_info
472                || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
473                || lines[i].trim().starts_with('#')
474                || TableUtils::is_potential_table_row(lines[i])
475                || lines[i].trim().is_empty()
476                || is_horizontal_rule(lines[i].trim())
477                || is_template_directive_only(lines[i])
478                || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
479                || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
480            {
481                i += 1;
482                continue;
483            }
484
485            // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
486            if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
487                let container_start = i;
488
489                // Detect the actual indent level from the first content line
490                // (supports nested admonitions with 8+ spaces)
491                let first_line = lines[i];
492                let base_indent_len = first_line.len() - first_line.trim_start().len();
493                let base_indent: String = " ".repeat(base_indent_len);
494
495                // Collect consecutive MkDocs container paragraph lines
496                let mut container_lines: Vec<&str> = Vec::new();
497                while i < lines.len() {
498                    let current_line_num = i + 1;
499                    let line_info = ctx.line_info(current_line_num);
500
501                    // Stop if we leave the MkDocs container
502                    if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
503                        break;
504                    }
505
506                    let line = lines[i];
507
508                    // Stop at paragraph boundaries within the container
509                    if line.trim().is_empty() {
510                        break;
511                    }
512
513                    // Skip list items, code blocks, headings within containers
514                    if is_list_item(line.trim())
515                        || line.trim().starts_with("```")
516                        || line.trim().starts_with("~~~")
517                        || line.trim().starts_with('#')
518                    {
519                        break;
520                    }
521
522                    container_lines.push(line);
523                    i += 1;
524                }
525
526                if container_lines.is_empty() {
527                    // Must advance i to avoid infinite loop when we encounter
528                    // non-paragraph content (code block, list, heading, empty line)
529                    // at the start of an MkDocs container
530                    i += 1;
531                    continue;
532                }
533
534                // Strip the base indent from each line and join for reflow
535                let stripped_lines: Vec<&str> = container_lines
536                    .iter()
537                    .map(|line| {
538                        if line.starts_with(&base_indent) {
539                            &line[base_indent_len..]
540                        } else {
541                            line.trim_start()
542                        }
543                    })
544                    .collect();
545                let paragraph_text = stripped_lines.join(" ");
546
547                // Check if reflow is needed
548                let needs_reflow = match config.reflow_mode {
549                    ReflowMode::Normalize => container_lines.len() > 1,
550                    ReflowMode::SentencePerLine => {
551                        let sentences = split_into_sentences(&paragraph_text);
552                        sentences.len() > 1 || container_lines.len() > 1
553                    }
554                    ReflowMode::SemanticLineBreaks => {
555                        let sentences = split_into_sentences(&paragraph_text);
556                        sentences.len() > 1
557                            || container_lines.len() > 1
558                            || container_lines
559                                .iter()
560                                .any(|line| self.calculate_effective_length(line) > config.line_length.get())
561                    }
562                    ReflowMode::Default => container_lines
563                        .iter()
564                        .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
565                };
566
567                if !needs_reflow {
568                    continue;
569                }
570
571                // Calculate byte range for this container paragraph
572                let start_range = line_index.whole_line_range(container_start + 1);
573                let end_line = container_start + container_lines.len() - 1;
574                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
575                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
576                } else {
577                    line_index.whole_line_range(end_line + 1)
578                };
579                let byte_range = start_range.start..end_range.end;
580
581                // Reflow with adjusted line length (accounting for the 4-space indent)
582                let reflow_line_length = if config.line_length.is_unlimited() {
583                    usize::MAX
584                } else {
585                    config.line_length.get().saturating_sub(base_indent_len).max(1)
586                };
587                let reflow_options = crate::utils::text_reflow::ReflowOptions {
588                    line_length: reflow_line_length,
589                    break_on_sentences: true,
590                    preserve_breaks: false,
591                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
592                    semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
593                    abbreviations: config.abbreviations_for_reflow(),
594                };
595                let reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
596
597                // Re-add the 4-space indent to each reflowed line
598                let reflowed_with_indent: Vec<String> =
599                    reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
600                let reflowed_text = reflowed_with_indent.join("\n");
601
602                // Preserve trailing newline
603                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
604                    format!("{reflowed_text}\n")
605                } else {
606                    reflowed_text
607                };
608
609                // Only generate a warning if the replacement is different
610                let original_text = &ctx.content[byte_range.clone()];
611                if original_text != replacement {
612                    warnings.push(LintWarning {
613                        rule_name: Some(self.name().to_string()),
614                        message: format!(
615                            "Line length {} exceeds {} characters (in MkDocs container)",
616                            container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
617                            config.line_length.get()
618                        ),
619                        line: container_start + 1,
620                        column: 1,
621                        end_line: end_line + 1,
622                        end_column: lines[end_line].len() + 1,
623                        severity: Severity::Warning,
624                        fix: Some(crate::rule::Fix {
625                            range: byte_range,
626                            replacement,
627                        }),
628                    });
629                }
630                continue;
631            }
632
633            // Helper function to detect semantic line markers
634            let is_semantic_line = |content: &str| -> bool {
635                let trimmed = content.trim_start();
636                let semantic_markers = [
637                    "NOTE:",
638                    "WARNING:",
639                    "IMPORTANT:",
640                    "CAUTION:",
641                    "TIP:",
642                    "DANGER:",
643                    "HINT:",
644                    "INFO:",
645                ];
646                semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
647            };
648
649            // Helper function to detect fence markers (opening or closing)
650            let is_fence_marker = |content: &str| -> bool {
651                let trimmed = content.trim_start();
652                trimmed.starts_with("```") || trimmed.starts_with("~~~")
653            };
654
655            // Check if this is a list item - handle it specially
656            let trimmed = lines[i].trim();
657            if is_list_item(trimmed) {
658                // Collect the entire list item including continuation lines
659                let list_start = i;
660                let (marker, first_content) = extract_list_marker_and_content(lines[i]);
661                let marker_len = marker.len();
662
663                // Track lines and their types (content, code block, fence, nested list)
664                #[derive(Clone)]
665                enum LineType {
666                    Content(String),
667                    CodeBlock(String, usize),      // content and original indent
668                    NestedListItem(String, usize), // full line content and original indent
669                    SemanticLine(String),          // Lines starting with NOTE:, WARNING:, etc that should stay separate
670                    SnippetLine(String),           // MkDocs Snippets delimiters (-8<-) that must stay on their own line
671                    DivMarker(String),             // Quarto/Pandoc div markers (::: opening or closing)
672                    Empty,
673                }
674
675                let mut actual_indent: Option<usize> = None;
676                let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
677                i += 1;
678
679                // Collect continuation lines using ctx.lines for metadata
680                while i < lines.len() {
681                    let line_info = &ctx.lines[i];
682
683                    // Use pre-computed is_blank from ctx
684                    if line_info.is_blank {
685                        // Empty line - check if next line is indented (part of list item)
686                        if i + 1 < lines.len() {
687                            let next_info = &ctx.lines[i + 1];
688
689                            // Check if next line is indented enough to be continuation
690                            if !next_info.is_blank && next_info.indent >= marker_len {
691                                // This blank line is between paragraphs/blocks in the list item
692                                list_item_lines.push(LineType::Empty);
693                                i += 1;
694                                continue;
695                            }
696                        }
697                        // No indented line after blank, end of list item
698                        break;
699                    }
700
701                    // Use pre-computed indent from ctx
702                    let indent = line_info.indent;
703
704                    // Valid continuation must be indented at least marker_len
705                    if indent >= marker_len {
706                        let trimmed = line_info.content(ctx.content).trim();
707
708                        // Use pre-computed in_code_block from ctx
709                        if line_info.in_code_block {
710                            list_item_lines.push(LineType::CodeBlock(
711                                line_info.content(ctx.content)[indent..].to_string(),
712                                indent,
713                            ));
714                            i += 1;
715                            continue;
716                        }
717
718                        // Check if this is a SIBLING list item (breaks parent)
719                        // Nested lists are indented >= marker_len and are PART of the parent item
720                        // Siblings are at indent < marker_len (at or before parent marker)
721                        if is_list_item(trimmed) && indent < marker_len {
722                            // This is a sibling item at same or higher level - end parent item
723                            break;
724                        }
725
726                        // Check if this is a NESTED list item marker
727                        // Nested lists should be processed separately UNLESS they're part of a
728                        // multi-paragraph list item (indicated by a blank line before them OR
729                        // it's a continuation of an already-started nested list)
730                        if is_list_item(trimmed) && indent >= marker_len {
731                            // Check if there was a blank line before this (multi-paragraph context)
732                            let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
733
734                            // Check if we've already seen nested list content (another nested item)
735                            let has_nested_content = list_item_lines.iter().any(|line| {
736                                matches!(line, LineType::Content(c) if is_list_item(c.trim()))
737                                    || matches!(line, LineType::NestedListItem(_, _))
738                            });
739
740                            if !has_blank_before && !has_nested_content {
741                                // Single-paragraph context with no prior nested items: starts a new item
742                                // End parent collection; nested list will be processed next
743                                break;
744                            }
745                            // else: multi-paragraph context or continuation of nested list, keep collecting
746                            // Mark this as a nested list item to preserve its structure
747                            list_item_lines.push(LineType::NestedListItem(
748                                line_info.content(ctx.content)[indent..].to_string(),
749                                indent,
750                            ));
751                            i += 1;
752                            continue;
753                        }
754
755                        // Normal continuation: marker_len to marker_len+3
756                        if indent <= marker_len + 3 {
757                            // Set actual_indent from first non-code continuation if not set
758                            if actual_indent.is_none() {
759                                actual_indent = Some(indent);
760                            }
761
762                            // Extract content (remove indentation and trailing whitespace)
763                            // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
764                            // See: https://github.com/rvben/rumdl/issues/76
765                            let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
766
767                            // Check if this is a div marker (::: opening or closing)
768                            // These must be preserved on their own line, not merged into paragraphs
769                            if line_info.is_div_marker {
770                                list_item_lines.push(LineType::DivMarker(content));
771                            }
772                            // Check if this is a fence marker (opening or closing)
773                            // These should be treated as code block lines, not paragraph content
774                            else if is_fence_marker(&content) {
775                                list_item_lines.push(LineType::CodeBlock(content, indent));
776                            }
777                            // Check if this is a semantic line (NOTE:, WARNING:, etc.)
778                            else if is_semantic_line(&content) {
779                                list_item_lines.push(LineType::SemanticLine(content));
780                            }
781                            // Check if this is a snippet block delimiter (-8<- or --8<--)
782                            // These must be preserved on their own lines for MkDocs Snippets extension
783                            else if is_snippet_block_delimiter(&content) {
784                                list_item_lines.push(LineType::SnippetLine(content));
785                            } else {
786                                list_item_lines.push(LineType::Content(content));
787                            }
788                            i += 1;
789                        } else {
790                            // indent >= marker_len + 4: indented code block
791                            list_item_lines.push(LineType::CodeBlock(
792                                line_info.content(ctx.content)[indent..].to_string(),
793                                indent,
794                            ));
795                            i += 1;
796                        }
797                    } else {
798                        // Not indented enough, end of list item
799                        break;
800                    }
801                }
802
803                // Use detected indent or fallback to marker length
804                let indent_size = actual_indent.unwrap_or(marker_len);
805                let expected_indent = " ".repeat(indent_size);
806
807                // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
808                #[derive(Clone)]
809                enum Block {
810                    Paragraph(Vec<String>),
811                    Code {
812                        lines: Vec<(String, usize)>, // (content, indent) pairs
813                        has_preceding_blank: bool,   // Whether there was a blank line before this block
814                    },
815                    NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
816                    SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
817                    SnippetLine(String),  // MkDocs Snippets delimiter that stays on its own line without extra spacing
818                    DivMarker(String),    // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
819                    Html {
820                        lines: Vec<String>,        // HTML content preserved exactly as-is
821                        has_preceding_blank: bool, // Whether there was a blank line before this block
822                    },
823                }
824
825                // HTML tag detection helpers
826                // Block-level HTML tags that should trigger HTML block detection
827                const BLOCK_LEVEL_TAGS: &[&str] = &[
828                    "div",
829                    "details",
830                    "summary",
831                    "section",
832                    "article",
833                    "header",
834                    "footer",
835                    "nav",
836                    "aside",
837                    "main",
838                    "table",
839                    "thead",
840                    "tbody",
841                    "tfoot",
842                    "tr",
843                    "td",
844                    "th",
845                    "ul",
846                    "ol",
847                    "li",
848                    "dl",
849                    "dt",
850                    "dd",
851                    "pre",
852                    "blockquote",
853                    "figure",
854                    "figcaption",
855                    "form",
856                    "fieldset",
857                    "legend",
858                    "hr",
859                    "p",
860                    "h1",
861                    "h2",
862                    "h3",
863                    "h4",
864                    "h5",
865                    "h6",
866                    "style",
867                    "script",
868                    "noscript",
869                ];
870
871                fn is_block_html_opening_tag(line: &str) -> Option<String> {
872                    let trimmed = line.trim();
873
874                    // Check for HTML comments
875                    if trimmed.starts_with("<!--") {
876                        return Some("!--".to_string());
877                    }
878
879                    // Check for opening tags
880                    if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
881                        // Extract tag name from <tagname ...> or <tagname>
882                        let after_bracket = &trimmed[1..];
883                        if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
884                            let tag_name = after_bracket[..end].to_lowercase();
885
886                            // Only treat as block if it's a known block-level tag
887                            if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
888                                return Some(tag_name);
889                            }
890                        }
891                    }
892                    None
893                }
894
895                fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
896                    let trimmed = line.trim();
897
898                    // Special handling for HTML comments
899                    if tag_name == "!--" {
900                        return trimmed.ends_with("-->");
901                    }
902
903                    // Check for closing tags: </tagname> or </tagname ...>
904                    trimmed.starts_with(&format!("</{tag_name}>"))
905                        || trimmed.starts_with(&format!("</{tag_name}  "))
906                        || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
907                }
908
909                fn is_self_closing_tag(line: &str) -> bool {
910                    let trimmed = line.trim();
911                    trimmed.ends_with("/>")
912                }
913
914                let mut blocks: Vec<Block> = Vec::new();
915                let mut current_paragraph: Vec<String> = Vec::new();
916                let mut current_code_block: Vec<(String, usize)> = Vec::new();
917                let mut current_nested_list: Vec<(String, usize)> = Vec::new();
918                let mut current_html_block: Vec<String> = Vec::new();
919                let mut html_tag_stack: Vec<String> = Vec::new();
920                let mut in_code = false;
921                let mut in_nested_list = false;
922                let mut in_html_block = false;
923                let mut had_preceding_blank = false; // Track if we just saw an empty line
924                let mut code_block_has_preceding_blank = false; // Track blank before current code block
925                let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
926
927                for line in &list_item_lines {
928                    match line {
929                        LineType::Empty => {
930                            if in_code {
931                                current_code_block.push((String::new(), 0));
932                            } else if in_nested_list {
933                                current_nested_list.push((String::new(), 0));
934                            } else if in_html_block {
935                                // Allow blank lines inside HTML blocks
936                                current_html_block.push(String::new());
937                            } else if !current_paragraph.is_empty() {
938                                blocks.push(Block::Paragraph(current_paragraph.clone()));
939                                current_paragraph.clear();
940                            }
941                            // Mark that we saw a blank line
942                            had_preceding_blank = true;
943                        }
944                        LineType::Content(content) => {
945                            // Check if we're currently in an HTML block
946                            if in_html_block {
947                                current_html_block.push(content.clone());
948
949                                // Check if this line closes any open HTML tags
950                                if let Some(last_tag) = html_tag_stack.last() {
951                                    if is_html_closing_tag(content, last_tag) {
952                                        html_tag_stack.pop();
953
954                                        // If stack is empty, HTML block is complete
955                                        if html_tag_stack.is_empty() {
956                                            blocks.push(Block::Html {
957                                                lines: current_html_block.clone(),
958                                                has_preceding_blank: html_block_has_preceding_blank,
959                                            });
960                                            current_html_block.clear();
961                                            in_html_block = false;
962                                        }
963                                    } else if let Some(new_tag) = is_block_html_opening_tag(content) {
964                                        // Nested opening tag within HTML block
965                                        if !is_self_closing_tag(content) {
966                                            html_tag_stack.push(new_tag);
967                                        }
968                                    }
969                                }
970                                had_preceding_blank = false;
971                            } else {
972                                // Not in HTML block - check if this line starts one
973                                if let Some(tag_name) = is_block_html_opening_tag(content) {
974                                    // Flush current paragraph before starting HTML block
975                                    if in_code {
976                                        blocks.push(Block::Code {
977                                            lines: current_code_block.clone(),
978                                            has_preceding_blank: code_block_has_preceding_blank,
979                                        });
980                                        current_code_block.clear();
981                                        in_code = false;
982                                    } else if in_nested_list {
983                                        blocks.push(Block::NestedList(current_nested_list.clone()));
984                                        current_nested_list.clear();
985                                        in_nested_list = false;
986                                    } else if !current_paragraph.is_empty() {
987                                        blocks.push(Block::Paragraph(current_paragraph.clone()));
988                                        current_paragraph.clear();
989                                    }
990
991                                    // Start new HTML block
992                                    in_html_block = true;
993                                    html_block_has_preceding_blank = had_preceding_blank;
994                                    current_html_block.push(content.clone());
995
996                                    // Check if it's self-closing or needs a closing tag
997                                    if is_self_closing_tag(content) {
998                                        // Self-closing tag - complete the HTML block immediately
999                                        blocks.push(Block::Html {
1000                                            lines: current_html_block.clone(),
1001                                            has_preceding_blank: html_block_has_preceding_blank,
1002                                        });
1003                                        current_html_block.clear();
1004                                        in_html_block = false;
1005                                    } else {
1006                                        // Regular opening tag - push to stack
1007                                        html_tag_stack.push(tag_name);
1008                                    }
1009                                } else {
1010                                    // Regular content line - add to paragraph
1011                                    if in_code {
1012                                        // Switching from code to content
1013                                        blocks.push(Block::Code {
1014                                            lines: current_code_block.clone(),
1015                                            has_preceding_blank: code_block_has_preceding_blank,
1016                                        });
1017                                        current_code_block.clear();
1018                                        in_code = false;
1019                                    } else if in_nested_list {
1020                                        // Switching from nested list to content
1021                                        blocks.push(Block::NestedList(current_nested_list.clone()));
1022                                        current_nested_list.clear();
1023                                        in_nested_list = false;
1024                                    }
1025                                    current_paragraph.push(content.clone());
1026                                }
1027                                had_preceding_blank = false; // Reset after content
1028                            }
1029                        }
1030                        LineType::CodeBlock(content, indent) => {
1031                            if in_nested_list {
1032                                // Switching from nested list to code
1033                                blocks.push(Block::NestedList(current_nested_list.clone()));
1034                                current_nested_list.clear();
1035                                in_nested_list = false;
1036                            } else if in_html_block {
1037                                // Switching from HTML block to code (shouldn't happen normally, but handle it)
1038                                blocks.push(Block::Html {
1039                                    lines: current_html_block.clone(),
1040                                    has_preceding_blank: html_block_has_preceding_blank,
1041                                });
1042                                current_html_block.clear();
1043                                html_tag_stack.clear();
1044                                in_html_block = false;
1045                            }
1046                            if !in_code {
1047                                // Switching from content to code
1048                                if !current_paragraph.is_empty() {
1049                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
1050                                    current_paragraph.clear();
1051                                }
1052                                in_code = true;
1053                                // Record whether there was a blank line before this code block
1054                                code_block_has_preceding_blank = had_preceding_blank;
1055                            }
1056                            current_code_block.push((content.clone(), *indent));
1057                            had_preceding_blank = false; // Reset after code
1058                        }
1059                        LineType::NestedListItem(content, indent) => {
1060                            if in_code {
1061                                // Switching from code to nested list
1062                                blocks.push(Block::Code {
1063                                    lines: current_code_block.clone(),
1064                                    has_preceding_blank: code_block_has_preceding_blank,
1065                                });
1066                                current_code_block.clear();
1067                                in_code = false;
1068                            } else if in_html_block {
1069                                // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1070                                blocks.push(Block::Html {
1071                                    lines: current_html_block.clone(),
1072                                    has_preceding_blank: html_block_has_preceding_blank,
1073                                });
1074                                current_html_block.clear();
1075                                html_tag_stack.clear();
1076                                in_html_block = false;
1077                            }
1078                            if !in_nested_list {
1079                                // Switching from content to nested list
1080                                if !current_paragraph.is_empty() {
1081                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
1082                                    current_paragraph.clear();
1083                                }
1084                                in_nested_list = true;
1085                            }
1086                            current_nested_list.push((content.clone(), *indent));
1087                            had_preceding_blank = false; // Reset after nested list
1088                        }
1089                        LineType::SemanticLine(content) => {
1090                            // Semantic lines are standalone - flush any current block and add as separate block
1091                            if in_code {
1092                                blocks.push(Block::Code {
1093                                    lines: current_code_block.clone(),
1094                                    has_preceding_blank: code_block_has_preceding_blank,
1095                                });
1096                                current_code_block.clear();
1097                                in_code = false;
1098                            } else if in_nested_list {
1099                                blocks.push(Block::NestedList(current_nested_list.clone()));
1100                                current_nested_list.clear();
1101                                in_nested_list = false;
1102                            } else if in_html_block {
1103                                blocks.push(Block::Html {
1104                                    lines: current_html_block.clone(),
1105                                    has_preceding_blank: html_block_has_preceding_blank,
1106                                });
1107                                current_html_block.clear();
1108                                html_tag_stack.clear();
1109                                in_html_block = false;
1110                            } else if !current_paragraph.is_empty() {
1111                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1112                                current_paragraph.clear();
1113                            }
1114                            // Add semantic line as its own block
1115                            blocks.push(Block::SemanticLine(content.clone()));
1116                            had_preceding_blank = false; // Reset after semantic line
1117                        }
1118                        LineType::SnippetLine(content) => {
1119                            // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1120                            // Unlike semantic lines, snippet lines don't add extra blank lines around them
1121                            if in_code {
1122                                blocks.push(Block::Code {
1123                                    lines: current_code_block.clone(),
1124                                    has_preceding_blank: code_block_has_preceding_blank,
1125                                });
1126                                current_code_block.clear();
1127                                in_code = false;
1128                            } else if in_nested_list {
1129                                blocks.push(Block::NestedList(current_nested_list.clone()));
1130                                current_nested_list.clear();
1131                                in_nested_list = false;
1132                            } else if in_html_block {
1133                                blocks.push(Block::Html {
1134                                    lines: current_html_block.clone(),
1135                                    has_preceding_blank: html_block_has_preceding_blank,
1136                                });
1137                                current_html_block.clear();
1138                                html_tag_stack.clear();
1139                                in_html_block = false;
1140                            } else if !current_paragraph.is_empty() {
1141                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1142                                current_paragraph.clear();
1143                            }
1144                            // Add snippet line as its own block
1145                            blocks.push(Block::SnippetLine(content.clone()));
1146                            had_preceding_blank = false;
1147                        }
1148                        LineType::DivMarker(content) => {
1149                            // Div markers (::: opening or closing) are standalone structural delimiters
1150                            // Flush any current block and add as separate block
1151                            if in_code {
1152                                blocks.push(Block::Code {
1153                                    lines: current_code_block.clone(),
1154                                    has_preceding_blank: code_block_has_preceding_blank,
1155                                });
1156                                current_code_block.clear();
1157                                in_code = false;
1158                            } else if in_nested_list {
1159                                blocks.push(Block::NestedList(current_nested_list.clone()));
1160                                current_nested_list.clear();
1161                                in_nested_list = false;
1162                            } else if in_html_block {
1163                                blocks.push(Block::Html {
1164                                    lines: current_html_block.clone(),
1165                                    has_preceding_blank: html_block_has_preceding_blank,
1166                                });
1167                                current_html_block.clear();
1168                                html_tag_stack.clear();
1169                                in_html_block = false;
1170                            } else if !current_paragraph.is_empty() {
1171                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1172                                current_paragraph.clear();
1173                            }
1174                            blocks.push(Block::DivMarker(content.clone()));
1175                            had_preceding_blank = false;
1176                        }
1177                    }
1178                }
1179
1180                // Push remaining block
1181                if in_code && !current_code_block.is_empty() {
1182                    blocks.push(Block::Code {
1183                        lines: current_code_block,
1184                        has_preceding_blank: code_block_has_preceding_blank,
1185                    });
1186                } else if in_nested_list && !current_nested_list.is_empty() {
1187                    blocks.push(Block::NestedList(current_nested_list));
1188                } else if in_html_block && !current_html_block.is_empty() {
1189                    // If we still have an unclosed HTML block, push it anyway
1190                    // (malformed HTML - missing closing tag)
1191                    blocks.push(Block::Html {
1192                        lines: current_html_block,
1193                        has_preceding_blank: html_block_has_preceding_blank,
1194                    });
1195                } else if !current_paragraph.is_empty() {
1196                    blocks.push(Block::Paragraph(current_paragraph));
1197                }
1198
1199                // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1200                let content_lines: Vec<String> = list_item_lines
1201                    .iter()
1202                    .filter_map(|line| {
1203                        if let LineType::Content(s) = line {
1204                            Some(s.clone())
1205                        } else {
1206                            None
1207                        }
1208                    })
1209                    .collect();
1210
1211                // Check if we need to reflow this list item
1212                // We check the combined content to see if it exceeds length limits
1213                let combined_content = content_lines.join(" ").trim().to_string();
1214                let full_line = format!("{marker}{combined_content}");
1215
1216                // Helper to check if we should reflow in normalize mode
1217                let should_normalize = || {
1218                    // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1219                    // DO normalize if it has plain text content that spans multiple lines
1220                    let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1221                    let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1222                    let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1223                    let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1224                    let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1225                    let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1226
1227                    // If we have structural blocks but no paragraphs, don't normalize
1228                    if (has_nested_lists
1229                        || has_code_blocks
1230                        || has_semantic_lines
1231                        || has_snippet_lines
1232                        || has_div_markers)
1233                        && !has_paragraphs
1234                    {
1235                        return false;
1236                    }
1237
1238                    // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1239                    if has_paragraphs {
1240                        let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1241                        if paragraph_count > 1 {
1242                            // Multiple paragraph blocks should be normalized
1243                            return true;
1244                        }
1245
1246                        // Single paragraph block: normalize if it has multiple content lines
1247                        if content_lines.len() > 1 {
1248                            return true;
1249                        }
1250                    }
1251
1252                    false
1253                };
1254
1255                let needs_reflow = match config.reflow_mode {
1256                    ReflowMode::Normalize => {
1257                        // Only reflow if:
1258                        // 1. The combined line would exceed the limit, OR
1259                        // 2. The list item should be normalized (has multi-line plain text)
1260                        let combined_length = self.calculate_effective_length(&full_line);
1261                        if combined_length > config.line_length.get() {
1262                            true
1263                        } else {
1264                            should_normalize()
1265                        }
1266                    }
1267                    ReflowMode::SentencePerLine => {
1268                        // Check if list item has multiple sentences
1269                        let sentences = split_into_sentences(&combined_content);
1270                        sentences.len() > 1
1271                    }
1272                    ReflowMode::SemanticLineBreaks => {
1273                        let sentences = split_into_sentences(&combined_content);
1274                        sentences.len() > 1
1275                            || (list_start..i).any(|line_idx| {
1276                                self.calculate_effective_length(lines[line_idx]) > config.line_length.get()
1277                            })
1278                    }
1279                    ReflowMode::Default => {
1280                        // In default mode, only reflow if any individual line exceeds limit
1281                        (list_start..i)
1282                            .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1283                    }
1284                };
1285
1286                if needs_reflow {
1287                    let start_range = line_index.whole_line_range(list_start + 1);
1288                    let end_line = i - 1;
1289                    let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1290                        line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1291                    } else {
1292                        line_index.whole_line_range(end_line + 1)
1293                    };
1294                    let byte_range = start_range.start..end_range.end;
1295
1296                    // Reflow each block (paragraphs only, preserve code blocks)
1297                    // When line_length = 0 (no limit), use a very large value for reflow
1298                    let reflow_line_length = if config.line_length.is_unlimited() {
1299                        usize::MAX
1300                    } else {
1301                        config.line_length.get().saturating_sub(indent_size).max(1)
1302                    };
1303                    let reflow_options = crate::utils::text_reflow::ReflowOptions {
1304                        line_length: reflow_line_length,
1305                        break_on_sentences: true,
1306                        preserve_breaks: false,
1307                        sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1308                        semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1309                        abbreviations: config.abbreviations_for_reflow(),
1310                    };
1311
1312                    let mut result: Vec<String> = Vec::new();
1313                    let mut is_first_block = true;
1314
1315                    for (block_idx, block) in blocks.iter().enumerate() {
1316                        match block {
1317                            Block::Paragraph(para_lines) => {
1318                                // Split the paragraph into segments at hard break boundaries
1319                                // Each segment can be reflowed independently
1320                                let segments = split_into_segments(para_lines);
1321
1322                                for (segment_idx, segment) in segments.iter().enumerate() {
1323                                    // Check if this segment ends with a hard break and what type
1324                                    let hard_break_type = segment.last().and_then(|line| {
1325                                        let line = line.strip_suffix('\r').unwrap_or(line);
1326                                        if line.ends_with('\\') {
1327                                            Some("\\")
1328                                        } else if line.ends_with("  ") {
1329                                            Some("  ")
1330                                        } else {
1331                                            None
1332                                        }
1333                                    });
1334
1335                                    // Join and reflow the segment (removing the hard break marker for processing)
1336                                    let segment_for_reflow: Vec<String> = segment
1337                                        .iter()
1338                                        .map(|line| {
1339                                            // Strip hard break marker (2 spaces or backslash) for reflow processing
1340                                            if line.ends_with('\\') {
1341                                                line[..line.len() - 1].trim_end().to_string()
1342                                            } else if line.ends_with("  ") {
1343                                                line[..line.len() - 2].trim_end().to_string()
1344                                            } else {
1345                                                line.clone()
1346                                            }
1347                                        })
1348                                        .collect();
1349
1350                                    let segment_text = segment_for_reflow.join(" ").trim().to_string();
1351                                    if !segment_text.is_empty() {
1352                                        let reflowed =
1353                                            crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1354
1355                                        if is_first_block && segment_idx == 0 {
1356                                            // First segment of first block starts with marker
1357                                            result.push(format!("{marker}{}", reflowed[0]));
1358                                            for line in reflowed.iter().skip(1) {
1359                                                result.push(format!("{expected_indent}{line}"));
1360                                            }
1361                                            is_first_block = false;
1362                                        } else {
1363                                            // Subsequent segments
1364                                            for line in reflowed {
1365                                                result.push(format!("{expected_indent}{line}"));
1366                                            }
1367                                        }
1368
1369                                        // If this segment had a hard break, add it back to the last line
1370                                        // Preserve the original hard break format (backslash or two spaces)
1371                                        if let Some(break_marker) = hard_break_type
1372                                            && let Some(last_line) = result.last_mut()
1373                                        {
1374                                            last_line.push_str(break_marker);
1375                                        }
1376                                    }
1377                                }
1378
1379                                // Add blank line after paragraph block if there's a next block
1380                                // BUT: check if next block is a code block that doesn't want a preceding blank
1381                                // Also don't add blank lines before snippet lines (they should stay tight)
1382                                if block_idx < blocks.len() - 1 {
1383                                    let next_block = &blocks[block_idx + 1];
1384                                    let should_add_blank = match next_block {
1385                                        Block::Code {
1386                                            has_preceding_blank, ..
1387                                        } => *has_preceding_blank,
1388                                        Block::SnippetLine(_) | Block::DivMarker(_) => false,
1389                                        _ => true, // For all other blocks, add blank line
1390                                    };
1391                                    if should_add_blank {
1392                                        result.push(String::new());
1393                                    }
1394                                }
1395                            }
1396                            Block::Code {
1397                                lines: code_lines,
1398                                has_preceding_blank: _,
1399                            } => {
1400                                // Preserve code blocks as-is with original indentation
1401                                // NOTE: Blank line before code block is handled by the previous block
1402                                // (see paragraph block's logic above)
1403
1404                                for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1405                                    if is_first_block && idx == 0 {
1406                                        // First line of first block gets marker
1407                                        result.push(format!(
1408                                            "{marker}{}",
1409                                            " ".repeat(orig_indent - marker_len) + content
1410                                        ));
1411                                        is_first_block = false;
1412                                    } else if content.is_empty() {
1413                                        result.push(String::new());
1414                                    } else {
1415                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1416                                    }
1417                                }
1418                            }
1419                            Block::NestedList(nested_items) => {
1420                                // Preserve nested list items as-is with original indentation
1421                                if !is_first_block {
1422                                    result.push(String::new());
1423                                }
1424
1425                                for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1426                                    if is_first_block && idx == 0 {
1427                                        // First line of first block gets marker
1428                                        result.push(format!(
1429                                            "{marker}{}",
1430                                            " ".repeat(orig_indent - marker_len) + content
1431                                        ));
1432                                        is_first_block = false;
1433                                    } else if content.is_empty() {
1434                                        result.push(String::new());
1435                                    } else {
1436                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1437                                    }
1438                                }
1439
1440                                // Add blank line after nested list if there's a next block
1441                                // Check if next block is a code block that doesn't want a preceding blank
1442                                if block_idx < blocks.len() - 1 {
1443                                    let next_block = &blocks[block_idx + 1];
1444                                    let should_add_blank = match next_block {
1445                                        Block::Code {
1446                                            has_preceding_blank, ..
1447                                        } => *has_preceding_blank,
1448                                        Block::SnippetLine(_) | Block::DivMarker(_) => false,
1449                                        _ => true, // For all other blocks, add blank line
1450                                    };
1451                                    if should_add_blank {
1452                                        result.push(String::new());
1453                                    }
1454                                }
1455                            }
1456                            Block::SemanticLine(content) => {
1457                                // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1458                                // Add blank line before if not first block
1459                                if !is_first_block {
1460                                    result.push(String::new());
1461                                }
1462
1463                                if is_first_block {
1464                                    // First block starts with marker
1465                                    result.push(format!("{marker}{content}"));
1466                                    is_first_block = false;
1467                                } else {
1468                                    // Subsequent blocks use expected indent
1469                                    result.push(format!("{expected_indent}{content}"));
1470                                }
1471
1472                                // Add blank line after semantic line if there's a next block
1473                                // Check if next block is a code block that doesn't want a preceding blank
1474                                if block_idx < blocks.len() - 1 {
1475                                    let next_block = &blocks[block_idx + 1];
1476                                    let should_add_blank = match next_block {
1477                                        Block::Code {
1478                                            has_preceding_blank, ..
1479                                        } => *has_preceding_blank,
1480                                        Block::SnippetLine(_) | Block::DivMarker(_) => false,
1481                                        _ => true, // For all other blocks, add blank line
1482                                    };
1483                                    if should_add_blank {
1484                                        result.push(String::new());
1485                                    }
1486                                }
1487                            }
1488                            Block::SnippetLine(content) => {
1489                                // Preserve snippet delimiters (-8<-) as-is on their own line
1490                                // Unlike semantic lines, snippet lines don't add extra blank lines
1491                                if is_first_block {
1492                                    // First block starts with marker
1493                                    result.push(format!("{marker}{content}"));
1494                                    is_first_block = false;
1495                                } else {
1496                                    // Subsequent blocks use expected indent
1497                                    result.push(format!("{expected_indent}{content}"));
1498                                }
1499                                // No blank lines added before or after snippet delimiters
1500                            }
1501                            Block::DivMarker(content) => {
1502                                // Preserve div markers (::: opening or closing) as-is on their own line
1503                                if is_first_block {
1504                                    result.push(format!("{marker}{content}"));
1505                                    is_first_block = false;
1506                                } else {
1507                                    result.push(format!("{expected_indent}{content}"));
1508                                }
1509                            }
1510                            Block::Html {
1511                                lines: html_lines,
1512                                has_preceding_blank: _,
1513                            } => {
1514                                // Preserve HTML blocks exactly as-is with original indentation
1515                                // NOTE: Blank line before HTML block is handled by the previous block
1516
1517                                for (idx, line) in html_lines.iter().enumerate() {
1518                                    if is_first_block && idx == 0 {
1519                                        // First line of first block gets marker
1520                                        result.push(format!("{marker}{line}"));
1521                                        is_first_block = false;
1522                                    } else if line.is_empty() {
1523                                        // Preserve blank lines inside HTML blocks
1524                                        result.push(String::new());
1525                                    } else {
1526                                        // Preserve lines with their original content (already includes indentation)
1527                                        result.push(format!("{expected_indent}{line}"));
1528                                    }
1529                                }
1530
1531                                // Add blank line after HTML block if there's a next block
1532                                if block_idx < blocks.len() - 1 {
1533                                    let next_block = &blocks[block_idx + 1];
1534                                    let should_add_blank = match next_block {
1535                                        Block::Code {
1536                                            has_preceding_blank, ..
1537                                        } => *has_preceding_blank,
1538                                        Block::Html {
1539                                            has_preceding_blank, ..
1540                                        } => *has_preceding_blank,
1541                                        Block::SnippetLine(_) | Block::DivMarker(_) => false,
1542                                        _ => true, // For all other blocks, add blank line
1543                                    };
1544                                    if should_add_blank {
1545                                        result.push(String::new());
1546                                    }
1547                                }
1548                            }
1549                        }
1550                    }
1551
1552                    let reflowed_text = result.join("\n");
1553
1554                    // Preserve trailing newline
1555                    let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1556                        format!("{reflowed_text}\n")
1557                    } else {
1558                        reflowed_text
1559                    };
1560
1561                    // Get the original text to compare
1562                    let original_text = &ctx.content[byte_range.clone()];
1563
1564                    // Only generate a warning if the replacement is different from the original
1565                    if original_text != replacement {
1566                        // Generate an appropriate message based on why reflow is needed
1567                        let message = match config.reflow_mode {
1568                            ReflowMode::SentencePerLine => {
1569                                let num_sentences = split_into_sentences(&combined_content).len();
1570                                let num_lines = content_lines.len();
1571                                if num_lines == 1 {
1572                                    // Single line with multiple sentences
1573                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1574                                } else {
1575                                    // Multiple lines - could be split sentences or mixed
1576                                    format!(
1577                                        "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1578                                    )
1579                                }
1580                            }
1581                            ReflowMode::SemanticLineBreaks => {
1582                                let num_sentences = split_into_sentences(&combined_content).len();
1583                                format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
1584                            }
1585                            ReflowMode::Normalize => {
1586                                let combined_length = self.calculate_effective_length(&full_line);
1587                                if combined_length > config.line_length.get() {
1588                                    format!(
1589                                        "Line length {} exceeds {} characters",
1590                                        combined_length,
1591                                        config.line_length.get()
1592                                    )
1593                                } else {
1594                                    "Multi-line content can be normalized".to_string()
1595                                }
1596                            }
1597                            ReflowMode::Default => {
1598                                let combined_length = self.calculate_effective_length(&full_line);
1599                                format!(
1600                                    "Line length {} exceeds {} characters",
1601                                    combined_length,
1602                                    config.line_length.get()
1603                                )
1604                            }
1605                        };
1606
1607                        warnings.push(LintWarning {
1608                            rule_name: Some(self.name().to_string()),
1609                            message,
1610                            line: list_start + 1,
1611                            column: 1,
1612                            end_line: end_line + 1,
1613                            end_column: lines[end_line].len() + 1,
1614                            severity: Severity::Warning,
1615                            fix: Some(crate::rule::Fix {
1616                                range: byte_range,
1617                                replacement,
1618                            }),
1619                        });
1620                    }
1621                }
1622                continue;
1623            }
1624
1625            // Found start of a paragraph - collect all lines in it
1626            let paragraph_start = i;
1627            let mut paragraph_lines = vec![lines[i]];
1628            i += 1;
1629
1630            while i < lines.len() {
1631                let next_line = lines[i];
1632                let next_line_num = i + 1;
1633                let next_trimmed = next_line.trim();
1634
1635                // Stop at paragraph boundaries
1636                if next_trimmed.is_empty()
1637                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1638                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1639                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1640                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1641                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1642                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1643                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1644                    || ctx
1645                        .line_info(next_line_num)
1646                        .is_some_and(|info| info.in_mkdocs_container())
1647                    || (next_line_num > 0
1648                        && next_line_num <= ctx.lines.len()
1649                        && ctx.lines[next_line_num - 1].blockquote.is_some())
1650                    || next_trimmed.starts_with('#')
1651                    || TableUtils::is_potential_table_row(next_line)
1652                    || is_list_item(next_trimmed)
1653                    || is_horizontal_rule(next_trimmed)
1654                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1655                    || is_template_directive_only(next_line)
1656                    || is_standalone_attr_list(next_line)
1657                    || is_snippet_block_delimiter(next_line)
1658                    || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
1659                {
1660                    break;
1661                }
1662
1663                // Check if the previous line ends with a hard break (2+ spaces or backslash)
1664                if i > 0 && has_hard_break(lines[i - 1]) {
1665                    // Don't include lines after hard breaks in the same paragraph
1666                    break;
1667                }
1668
1669                paragraph_lines.push(next_line);
1670                i += 1;
1671            }
1672
1673            // Combine paragraph lines into a single string for processing
1674            // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1675            let paragraph_text = paragraph_lines.join(" ");
1676
1677            // Skip reflowing if this paragraph contains definition list items
1678            // Definition lists are multi-line structures that should not be joined
1679            let contains_definition_list = paragraph_lines
1680                .iter()
1681                .any(|line| crate::utils::is_definition_list_item(line));
1682
1683            if contains_definition_list {
1684                // Don't reflow definition lists - skip this paragraph
1685                i = paragraph_start + paragraph_lines.len();
1686                continue;
1687            }
1688
1689            // Skip reflowing if this paragraph contains MkDocs Snippets markers
1690            // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1691            let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1692
1693            if contains_snippets {
1694                // Don't reflow Snippets blocks - skip this paragraph
1695                i = paragraph_start + paragraph_lines.len();
1696                continue;
1697            }
1698
1699            // Check if this paragraph needs reflowing
1700            let needs_reflow = match config.reflow_mode {
1701                ReflowMode::Normalize => {
1702                    // In normalize mode, reflow multi-line paragraphs
1703                    paragraph_lines.len() > 1
1704                }
1705                ReflowMode::SentencePerLine => {
1706                    // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1707                    // Note: we check the joined text because sentences can span multiple lines
1708                    let sentences = split_into_sentences(&paragraph_text);
1709
1710                    // Always reflow if multiple sentences on one line
1711                    if sentences.len() > 1 {
1712                        true
1713                    } else if paragraph_lines.len() > 1 {
1714                        // For single-sentence paragraphs spanning multiple lines:
1715                        // Reflow if they COULD fit on one line (respecting line-length constraint)
1716                        if config.line_length.is_unlimited() {
1717                            // No line-length constraint - always join single sentences
1718                            true
1719                        } else {
1720                            // Only join if it fits within line-length
1721                            let effective_length = self.calculate_effective_length(&paragraph_text);
1722                            effective_length <= config.line_length.get()
1723                        }
1724                    } else {
1725                        false
1726                    }
1727                }
1728                ReflowMode::SemanticLineBreaks => {
1729                    let sentences = split_into_sentences(&paragraph_text);
1730                    // Reflow if multiple sentences, multiple lines, or any line exceeds limit
1731                    sentences.len() > 1
1732                        || paragraph_lines.len() > 1
1733                        || paragraph_lines
1734                            .iter()
1735                            .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1736                }
1737                ReflowMode::Default => {
1738                    // In default mode, only reflow if lines exceed limit
1739                    paragraph_lines
1740                        .iter()
1741                        .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1742                }
1743            };
1744
1745            if needs_reflow {
1746                // Calculate byte range for this paragraph
1747                // Use whole_line_range for each line and combine
1748                let start_range = line_index.whole_line_range(paragraph_start + 1);
1749                let end_line = paragraph_start + paragraph_lines.len() - 1;
1750
1751                // For the last line, we want to preserve any trailing newline
1752                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1753                    // Last line without trailing newline - use line_text_range
1754                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1755                } else {
1756                    // Not the last line or has trailing newline - use whole_line_range
1757                    line_index.whole_line_range(end_line + 1)
1758                };
1759
1760                let byte_range = start_range.start..end_range.end;
1761
1762                // Check if the paragraph ends with a hard break and what type
1763                let hard_break_type = paragraph_lines.last().and_then(|line| {
1764                    let line = line.strip_suffix('\r').unwrap_or(line);
1765                    if line.ends_with('\\') {
1766                        Some("\\")
1767                    } else if line.ends_with("  ") {
1768                        Some("  ")
1769                    } else {
1770                        None
1771                    }
1772                });
1773
1774                // Reflow the paragraph
1775                // When line_length = 0 (no limit), use a very large value for reflow
1776                let reflow_line_length = if config.line_length.is_unlimited() {
1777                    usize::MAX
1778                } else {
1779                    config.line_length.get()
1780                };
1781                let reflow_options = crate::utils::text_reflow::ReflowOptions {
1782                    line_length: reflow_line_length,
1783                    break_on_sentences: true,
1784                    preserve_breaks: false,
1785                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1786                    semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1787                    abbreviations: config.abbreviations_for_reflow(),
1788                };
1789                let mut reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
1790
1791                // If the original paragraph ended with a hard break, preserve it
1792                // Preserve the original hard break format (backslash or two spaces)
1793                if let Some(break_marker) = hard_break_type
1794                    && !reflowed.is_empty()
1795                {
1796                    let last_idx = reflowed.len() - 1;
1797                    if !has_hard_break(&reflowed[last_idx]) {
1798                        reflowed[last_idx].push_str(break_marker);
1799                    }
1800                }
1801
1802                let reflowed_text = reflowed.join("\n");
1803
1804                // Preserve trailing newline if the original paragraph had one
1805                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1806                    format!("{reflowed_text}\n")
1807                } else {
1808                    reflowed_text
1809                };
1810
1811                // Get the original text to compare
1812                let original_text = &ctx.content[byte_range.clone()];
1813
1814                // Only generate a warning if the replacement is different from the original
1815                if original_text != replacement {
1816                    // Create warning with actual fix
1817                    // In default mode, report the specific line that violates
1818                    // In normalize mode, report the whole paragraph
1819                    // In sentence-per-line mode, report the entire paragraph
1820                    let (warning_line, warning_end_line) = match config.reflow_mode {
1821                        ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1822                        ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
1823                            // Highlight the entire paragraph that needs reformatting
1824                            (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1825                        }
1826                        ReflowMode::Default => {
1827                            // Find the first line that exceeds the limit
1828                            let mut violating_line = paragraph_start;
1829                            for (idx, line) in paragraph_lines.iter().enumerate() {
1830                                if self.calculate_effective_length(line) > config.line_length.get() {
1831                                    violating_line = paragraph_start + idx;
1832                                    break;
1833                                }
1834                            }
1835                            (violating_line + 1, violating_line + 1)
1836                        }
1837                    };
1838
1839                    warnings.push(LintWarning {
1840                        rule_name: Some(self.name().to_string()),
1841                        message: match config.reflow_mode {
1842                            ReflowMode::Normalize => format!(
1843                                "Paragraph could be normalized to use line length of {} characters",
1844                                config.line_length.get()
1845                            ),
1846                            ReflowMode::SentencePerLine => {
1847                                let num_sentences = split_into_sentences(&paragraph_text).len();
1848                                if paragraph_lines.len() == 1 {
1849                                    // Single line with multiple sentences
1850                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1851                                } else {
1852                                    let num_lines = paragraph_lines.len();
1853                                    // Multiple lines - could be split sentences or mixed
1854                                    format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1855                                }
1856                            },
1857                            ReflowMode::SemanticLineBreaks => {
1858                                let num_sentences = split_into_sentences(&paragraph_text).len();
1859                                format!(
1860                                    "Paragraph should use semantic line breaks ({num_sentences} sentences)"
1861                                )
1862                            },
1863                            ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1864                        },
1865                        line: warning_line,
1866                        column: 1,
1867                        end_line: warning_end_line,
1868                        end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1869                        severity: Severity::Warning,
1870                        fix: Some(crate::rule::Fix {
1871                            range: byte_range,
1872                            replacement,
1873                        }),
1874                    });
1875                }
1876            }
1877        }
1878
1879        warnings
1880    }
1881
1882    /// Calculate string length based on the configured length mode
1883    fn calculate_string_length(&self, s: &str) -> usize {
1884        match self.config.length_mode {
1885            LengthMode::Chars => s.chars().count(),
1886            LengthMode::Visual => s.width(),
1887            LengthMode::Bytes => s.len(),
1888        }
1889    }
1890
1891    /// Calculate effective line length
1892    ///
1893    /// Returns the actual display length of the line using the configured length mode.
1894    fn calculate_effective_length(&self, line: &str) -> usize {
1895        self.calculate_string_length(line)
1896    }
1897}