Skip to main content

rumdl_lib/rules/md013_line_length/
mod.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
7use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
8use crate::utils::range_utils::LineIndex;
9use crate::utils::range_utils::calculate_excess_range;
10use crate::utils::regex_cache::{
11    IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
12};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::split_into_sentences;
15use toml;
16
17mod helpers;
18pub mod md013_config;
19use helpers::{
20    extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
21    split_into_segments, trim_preserving_hard_break,
22};
23pub use md013_config::MD013Config;
24use md013_config::{LengthMode, ReflowMode};
25
26#[cfg(test)]
27mod tests;
28use unicode_width::UnicodeWidthStr;
29
30#[derive(Clone, Default)]
31pub struct MD013LineLength {
32    pub(crate) config: MD013Config,
33}
34
35impl MD013LineLength {
36    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
37        Self {
38            config: MD013Config {
39                line_length: crate::types::LineLength::new(line_length),
40                code_blocks,
41                tables,
42                headings,
43                paragraphs: true, // Default to true for backwards compatibility
44                strict,
45                reflow: false,
46                reflow_mode: ReflowMode::default(),
47                length_mode: LengthMode::default(),
48                abbreviations: Vec::new(),
49            },
50        }
51    }
52
53    pub fn from_config_struct(config: MD013Config) -> Self {
54        Self { config }
55    }
56
57    fn should_ignore_line(
58        &self,
59        line: &str,
60        _lines: &[&str],
61        current_line: usize,
62        ctx: &crate::lint_context::LintContext,
63    ) -> bool {
64        if self.config.strict {
65            return false;
66        }
67
68        // Quick check for common patterns before expensive regex
69        let trimmed = line.trim();
70
71        // Only skip if the entire line is a URL (quick check first)
72        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
73            return true;
74        }
75
76        // Only skip if the entire line is an image reference (quick check first)
77        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
78            return true;
79        }
80
81        // Only skip if the entire line is a link reference (quick check first)
82        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
83            return true;
84        }
85
86        // Code blocks with long strings (only check if in code block)
87        if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
88            && !trimmed.is_empty()
89            && !line.contains(' ')
90            && !line.contains('\t')
91        {
92            return true;
93        }
94
95        false
96    }
97
98    /// Check if rule should skip based on provided config (used for inline config support)
99    fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
100        // Skip if content is empty
101        if ctx.content.is_empty() {
102            return true;
103        }
104
105        // For sentence-per-line or normalize mode, never skip based on line length
106        if config.reflow
107            && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
108        {
109            return false;
110        }
111
112        // Quick check: if total content is shorter than line limit, definitely skip
113        if ctx.content.len() <= config.line_length.get() {
114            return true;
115        }
116
117        // Skip if no line exceeds the limit
118        !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
119    }
120}
121
122impl Rule for MD013LineLength {
123    fn name(&self) -> &'static str {
124        "MD013"
125    }
126
127    fn description(&self) -> &'static str {
128        "Line length should not be excessive"
129    }
130
131    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
132        let content = ctx.content;
133
134        // Parse inline configuration FIRST so we can use effective config for should_skip
135        let inline_config = crate::inline_config::InlineConfig::from_content(content);
136        let config_override = inline_config.get_rule_config("MD013");
137
138        // Apply configuration override if present
139        let effective_config = if let Some(json_config) = config_override {
140            if let Some(obj) = json_config.as_object() {
141                let mut config = self.config.clone();
142                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
143                    config.line_length = crate::types::LineLength::new(line_length as usize);
144                }
145                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
146                    config.code_blocks = code_blocks;
147                }
148                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
149                    config.tables = tables;
150                }
151                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
152                    config.headings = headings;
153                }
154                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
155                    config.strict = strict;
156                }
157                if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
158                    config.reflow = reflow;
159                }
160                if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
161                    config.reflow_mode = match reflow_mode {
162                        "default" => ReflowMode::Default,
163                        "normalize" => ReflowMode::Normalize,
164                        "sentence-per-line" => ReflowMode::SentencePerLine,
165                        _ => ReflowMode::default(),
166                    };
167                }
168                config
169            } else {
170                self.config.clone()
171            }
172        } else {
173            self.config.clone()
174        };
175
176        // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
177        // But don't skip if we're in reflow mode with Normalize or SentencePerLine
178        if self.should_skip_with_config(ctx, &effective_config)
179            && !(effective_config.reflow
180                && (effective_config.reflow_mode == ReflowMode::Normalize
181                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
182        {
183            return Ok(Vec::new());
184        }
185
186        // Direct implementation without DocumentStructure
187        let mut warnings = Vec::new();
188
189        // Special handling: line_length = 0 means "no line length limit"
190        // Skip all line length checks, but still allow reflow if enabled
191        let skip_length_checks = effective_config.line_length.is_unlimited();
192
193        // Pre-filter lines that could be problematic to avoid processing all lines
194        let mut candidate_lines = Vec::new();
195        if !skip_length_checks {
196            for (line_idx, line_info) in ctx.lines.iter().enumerate() {
197                // Skip front matter - it should never be linted
198                if line_info.in_front_matter {
199                    continue;
200                }
201
202                // Quick length check first
203                if line_info.byte_len > effective_config.line_length.get() {
204                    candidate_lines.push(line_idx);
205                }
206            }
207        }
208
209        // If no candidate lines and not in normalize or sentence-per-line mode, early return
210        if candidate_lines.is_empty()
211            && !(effective_config.reflow
212                && (effective_config.reflow_mode == ReflowMode::Normalize
213                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
214        {
215            return Ok(warnings);
216        }
217
218        // Use ctx.lines if available for better performance
219        let lines: Vec<&str> = if !ctx.lines.is_empty() {
220            ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
221        } else {
222            content.lines().collect()
223        };
224
225        // Create a quick lookup set for heading lines
226        // We need this for both the heading skip check AND the paragraphs check
227        let heading_lines_set: std::collections::HashSet<usize> = ctx
228            .lines
229            .iter()
230            .enumerate()
231            .filter(|(_, line)| line.heading.is_some())
232            .map(|(idx, _)| idx + 1)
233            .collect();
234
235        // Use pre-computed table blocks from context
236        // We need this for both the table skip check AND the paragraphs check
237        let table_blocks = &ctx.table_blocks;
238        let mut table_lines_set = std::collections::HashSet::new();
239        for table in table_blocks {
240            table_lines_set.insert(table.header_line + 1);
241            table_lines_set.insert(table.delimiter_line + 1);
242            for &line in &table.content_lines {
243                table_lines_set.insert(line + 1);
244            }
245        }
246
247        // Process candidate lines for line length checks
248        for &line_idx in &candidate_lines {
249            let line_number = line_idx + 1;
250            let line = lines[line_idx];
251
252            // Calculate effective length excluding unbreakable URLs
253            let effective_length = self.calculate_effective_length(line);
254
255            // Use single line length limit for all content
256            let line_limit = effective_config.line_length.get();
257
258            // Skip short lines immediately (double-check after effective length calculation)
259            if effective_length <= line_limit {
260                continue;
261            }
262
263            // Skip mkdocstrings blocks (already handled by LintContext)
264            if ctx.lines[line_idx].in_mkdocstrings {
265                continue;
266            }
267
268            // Skip various block types efficiently
269            if !effective_config.strict {
270                // Skip setext heading underlines
271                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
272                    continue;
273                }
274
275                // Skip block elements according to config flags
276                // The flags mean: true = check these elements, false = skip these elements
277                // So we skip when the flag is FALSE and the line is in that element type
278                if (!effective_config.headings && heading_lines_set.contains(&line_number))
279                    || (!effective_config.code_blocks
280                        && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
281                    || (!effective_config.tables && table_lines_set.contains(&line_number))
282                    || ctx.lines[line_number - 1].blockquote.is_some()
283                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
284                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
285                    || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
286                    || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
287                    || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
288                {
289                    continue;
290                }
291
292                // Check if this is a paragraph/regular text line
293                // If paragraphs = false, skip lines that are NOT in special blocks
294                if !effective_config.paragraphs {
295                    let is_special_block = heading_lines_set.contains(&line_number)
296                        || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
297                        || table_lines_set.contains(&line_number)
298                        || ctx.lines[line_number - 1].blockquote.is_some()
299                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
300                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
301                        || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
302                        || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
303                        || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
304                        || ctx
305                            .line_info(line_number)
306                            .is_some_and(|info| info.in_mkdocs_container());
307
308                    // Skip regular paragraph text when paragraphs = false
309                    if !is_special_block {
310                        continue;
311                    }
312                }
313
314                // Skip lines that are only a URL, image ref, or link ref
315                if self.should_ignore_line(line, &lines, line_idx, ctx) {
316                    continue;
317                }
318            }
319
320            // In sentence-per-line mode, check if this is a single long sentence
321            // If so, emit a warning without a fix (user must manually rephrase)
322            if effective_config.reflow_mode == ReflowMode::SentencePerLine {
323                let sentences = split_into_sentences(line.trim());
324                if sentences.len() == 1 {
325                    // Single sentence that's too long - warn but don't auto-fix
326                    let message = format!("Line length {effective_length} exceeds {line_limit} characters");
327
328                    let (start_line, start_col, end_line, end_col) =
329                        calculate_excess_range(line_number, line, line_limit);
330
331                    warnings.push(LintWarning {
332                        rule_name: Some(self.name().to_string()),
333                        message,
334                        line: start_line,
335                        column: start_col,
336                        end_line,
337                        end_column: end_col,
338                        severity: Severity::Warning,
339                        fix: None, // No auto-fix for long single sentences
340                    });
341                    continue;
342                }
343                // Multiple sentences will be handled by paragraph-based reflow
344                continue;
345            }
346
347            // Don't provide fix for individual lines when reflow is enabled
348            // Paragraph-based fixes will be handled separately
349            let fix = None;
350
351            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
352
353            // Calculate precise character range for the excess portion
354            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
355
356            warnings.push(LintWarning {
357                rule_name: Some(self.name().to_string()),
358                message,
359                line: start_line,
360                column: start_col,
361                end_line,
362                end_column: end_col,
363                severity: Severity::Warning,
364                fix,
365            });
366        }
367
368        // If reflow is enabled, generate paragraph-based fixes
369        if effective_config.reflow {
370            let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
371            // Merge paragraph warnings with line warnings, removing duplicates
372            for pw in paragraph_warnings {
373                // Remove any line warnings that overlap with this paragraph
374                warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
375                warnings.push(pw);
376            }
377        }
378
379        Ok(warnings)
380    }
381
382    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
383        // For CLI usage, apply fixes from warnings
384        // LSP will use the warning-based fixes directly
385        let warnings = self.check(ctx)?;
386
387        // If there are no fixes, return content unchanged
388        if !warnings.iter().any(|w| w.fix.is_some()) {
389            return Ok(ctx.content.to_string());
390        }
391
392        // Apply warning-based fixes
393        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
394            .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
395    }
396
397    fn as_any(&self) -> &dyn std::any::Any {
398        self
399    }
400
401    fn category(&self) -> RuleCategory {
402        RuleCategory::Whitespace
403    }
404
405    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
406        self.should_skip_with_config(ctx, &self.config)
407    }
408
409    fn default_config_section(&self) -> Option<(String, toml::Value)> {
410        let default_config = MD013Config::default();
411        let json_value = serde_json::to_value(&default_config).ok()?;
412        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
413
414        if let toml::Value::Table(table) = toml_value {
415            if !table.is_empty() {
416                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
417            } else {
418                None
419            }
420        } else {
421            None
422        }
423    }
424
425    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
426        let mut aliases = std::collections::HashMap::new();
427        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
428        Some(aliases)
429    }
430
431    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
432    where
433        Self: Sized,
434    {
435        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
436        // Use global line_length if rule-specific config still has default value
437        if rule_config.line_length.get() == 80 {
438            rule_config.line_length = config.global.line_length;
439        }
440        Box::new(Self::from_config_struct(rule_config))
441    }
442}
443
444impl MD013LineLength {
445    /// Generate paragraph-based fixes
446    fn generate_paragraph_fixes(
447        &self,
448        ctx: &crate::lint_context::LintContext,
449        config: &MD013Config,
450        lines: &[&str],
451    ) -> Vec<LintWarning> {
452        let mut warnings = Vec::new();
453        let line_index = LineIndex::new(ctx.content);
454
455        let mut i = 0;
456        while i < lines.len() {
457            let line_num = i + 1;
458
459            // Skip special structures (but NOT MkDocs containers - those get special handling)
460            let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
461                info.in_code_block
462                    || info.in_front_matter
463                    || info.in_html_block
464                    || info.in_html_comment
465                    || info.in_esm_block
466                    || info.in_jsx_expression
467                    || info.in_mdx_comment
468            });
469
470            if should_skip_due_to_line_info
471                || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
472                || lines[i].trim().starts_with('#')
473                || TableUtils::is_potential_table_row(lines[i])
474                || lines[i].trim().is_empty()
475                || is_horizontal_rule(lines[i].trim())
476                || is_template_directive_only(lines[i])
477            {
478                i += 1;
479                continue;
480            }
481
482            // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
483            if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
484                let container_start = i;
485
486                // Detect the actual indent level from the first content line
487                // (supports nested admonitions with 8+ spaces)
488                let first_line = lines[i];
489                let base_indent_len = first_line.len() - first_line.trim_start().len();
490                let base_indent: String = " ".repeat(base_indent_len);
491
492                // Collect consecutive MkDocs container paragraph lines
493                let mut container_lines: Vec<&str> = Vec::new();
494                while i < lines.len() {
495                    let current_line_num = i + 1;
496                    let line_info = ctx.line_info(current_line_num);
497
498                    // Stop if we leave the MkDocs container
499                    if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
500                        break;
501                    }
502
503                    let line = lines[i];
504
505                    // Stop at paragraph boundaries within the container
506                    if line.trim().is_empty() {
507                        break;
508                    }
509
510                    // Skip list items, code blocks, headings within containers
511                    if is_list_item(line.trim())
512                        || line.trim().starts_with("```")
513                        || line.trim().starts_with("~~~")
514                        || line.trim().starts_with('#')
515                    {
516                        break;
517                    }
518
519                    container_lines.push(line);
520                    i += 1;
521                }
522
523                if container_lines.is_empty() {
524                    // Must advance i to avoid infinite loop when we encounter
525                    // non-paragraph content (code block, list, heading, empty line)
526                    // at the start of an MkDocs container
527                    i += 1;
528                    continue;
529                }
530
531                // Strip the base indent from each line and join for reflow
532                let stripped_lines: Vec<&str> = container_lines
533                    .iter()
534                    .map(|line| {
535                        if line.starts_with(&base_indent) {
536                            &line[base_indent_len..]
537                        } else {
538                            line.trim_start()
539                        }
540                    })
541                    .collect();
542                let paragraph_text = stripped_lines.join(" ");
543
544                // Check if reflow is needed
545                let needs_reflow = match config.reflow_mode {
546                    ReflowMode::Normalize => container_lines.len() > 1,
547                    ReflowMode::SentencePerLine => {
548                        let sentences = split_into_sentences(&paragraph_text);
549                        sentences.len() > 1 || container_lines.len() > 1
550                    }
551                    ReflowMode::Default => container_lines
552                        .iter()
553                        .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
554                };
555
556                if !needs_reflow {
557                    continue;
558                }
559
560                // Calculate byte range for this container paragraph
561                let start_range = line_index.whole_line_range(container_start + 1);
562                let end_line = container_start + container_lines.len() - 1;
563                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
564                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
565                } else {
566                    line_index.whole_line_range(end_line + 1)
567                };
568                let byte_range = start_range.start..end_range.end;
569
570                // Reflow with adjusted line length (accounting for the 4-space indent)
571                let reflow_line_length = if config.line_length.is_unlimited() {
572                    usize::MAX
573                } else {
574                    config.line_length.get().saturating_sub(base_indent_len).max(1)
575                };
576                let reflow_options = crate::utils::text_reflow::ReflowOptions {
577                    line_length: reflow_line_length,
578                    break_on_sentences: true,
579                    preserve_breaks: false,
580                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
581                    abbreviations: config.abbreviations_for_reflow(),
582                };
583                let reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
584
585                // Re-add the 4-space indent to each reflowed line
586                let reflowed_with_indent: Vec<String> =
587                    reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
588                let reflowed_text = reflowed_with_indent.join("\n");
589
590                // Preserve trailing newline
591                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
592                    format!("{reflowed_text}\n")
593                } else {
594                    reflowed_text
595                };
596
597                // Only generate a warning if the replacement is different
598                let original_text = &ctx.content[byte_range.clone()];
599                if original_text != replacement {
600                    warnings.push(LintWarning {
601                        rule_name: Some(self.name().to_string()),
602                        message: format!(
603                            "Line length {} exceeds {} characters (in MkDocs container)",
604                            container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
605                            config.line_length.get()
606                        ),
607                        line: container_start + 1,
608                        column: 1,
609                        end_line: end_line + 1,
610                        end_column: lines[end_line].len() + 1,
611                        severity: Severity::Warning,
612                        fix: Some(crate::rule::Fix {
613                            range: byte_range,
614                            replacement,
615                        }),
616                    });
617                }
618                continue;
619            }
620
621            // Helper function to detect semantic line markers
622            let is_semantic_line = |content: &str| -> bool {
623                let trimmed = content.trim_start();
624                let semantic_markers = [
625                    "NOTE:",
626                    "WARNING:",
627                    "IMPORTANT:",
628                    "CAUTION:",
629                    "TIP:",
630                    "DANGER:",
631                    "HINT:",
632                    "INFO:",
633                ];
634                semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
635            };
636
637            // Helper function to detect fence markers (opening or closing)
638            let is_fence_marker = |content: &str| -> bool {
639                let trimmed = content.trim_start();
640                trimmed.starts_with("```") || trimmed.starts_with("~~~")
641            };
642
643            // Check if this is a list item - handle it specially
644            let trimmed = lines[i].trim();
645            if is_list_item(trimmed) {
646                // Collect the entire list item including continuation lines
647                let list_start = i;
648                let (marker, first_content) = extract_list_marker_and_content(lines[i]);
649                let marker_len = marker.len();
650
651                // Track lines and their types (content, code block, fence, nested list)
652                #[derive(Clone)]
653                enum LineType {
654                    Content(String),
655                    CodeBlock(String, usize),      // content and original indent
656                    NestedListItem(String, usize), // full line content and original indent
657                    SemanticLine(String),          // Lines starting with NOTE:, WARNING:, etc that should stay separate
658                    SnippetLine(String),           // MkDocs Snippets delimiters (-8<-) that must stay on their own line
659                    Empty,
660                }
661
662                let mut actual_indent: Option<usize> = None;
663                let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
664                i += 1;
665
666                // Collect continuation lines using ctx.lines for metadata
667                while i < lines.len() {
668                    let line_info = &ctx.lines[i];
669
670                    // Use pre-computed is_blank from ctx
671                    if line_info.is_blank {
672                        // Empty line - check if next line is indented (part of list item)
673                        if i + 1 < lines.len() {
674                            let next_info = &ctx.lines[i + 1];
675
676                            // Check if next line is indented enough to be continuation
677                            if !next_info.is_blank && next_info.indent >= marker_len {
678                                // This blank line is between paragraphs/blocks in the list item
679                                list_item_lines.push(LineType::Empty);
680                                i += 1;
681                                continue;
682                            }
683                        }
684                        // No indented line after blank, end of list item
685                        break;
686                    }
687
688                    // Use pre-computed indent from ctx
689                    let indent = line_info.indent;
690
691                    // Valid continuation must be indented at least marker_len
692                    if indent >= marker_len {
693                        let trimmed = line_info.content(ctx.content).trim();
694
695                        // Use pre-computed in_code_block from ctx
696                        if line_info.in_code_block {
697                            list_item_lines.push(LineType::CodeBlock(
698                                line_info.content(ctx.content)[indent..].to_string(),
699                                indent,
700                            ));
701                            i += 1;
702                            continue;
703                        }
704
705                        // Check if this is a SIBLING list item (breaks parent)
706                        // Nested lists are indented >= marker_len and are PART of the parent item
707                        // Siblings are at indent < marker_len (at or before parent marker)
708                        if is_list_item(trimmed) && indent < marker_len {
709                            // This is a sibling item at same or higher level - end parent item
710                            break;
711                        }
712
713                        // Check if this is a NESTED list item marker
714                        // Nested lists should be processed separately UNLESS they're part of a
715                        // multi-paragraph list item (indicated by a blank line before them OR
716                        // it's a continuation of an already-started nested list)
717                        if is_list_item(trimmed) && indent >= marker_len {
718                            // Check if there was a blank line before this (multi-paragraph context)
719                            let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
720
721                            // Check if we've already seen nested list content (another nested item)
722                            let has_nested_content = list_item_lines.iter().any(|line| {
723                                matches!(line, LineType::Content(c) if is_list_item(c.trim()))
724                                    || matches!(line, LineType::NestedListItem(_, _))
725                            });
726
727                            if !has_blank_before && !has_nested_content {
728                                // Single-paragraph context with no prior nested items: starts a new item
729                                // End parent collection; nested list will be processed next
730                                break;
731                            }
732                            // else: multi-paragraph context or continuation of nested list, keep collecting
733                            // Mark this as a nested list item to preserve its structure
734                            list_item_lines.push(LineType::NestedListItem(
735                                line_info.content(ctx.content)[indent..].to_string(),
736                                indent,
737                            ));
738                            i += 1;
739                            continue;
740                        }
741
742                        // Normal continuation: marker_len to marker_len+3
743                        if indent <= marker_len + 3 {
744                            // Set actual_indent from first non-code continuation if not set
745                            if actual_indent.is_none() {
746                                actual_indent = Some(indent);
747                            }
748
749                            // Extract content (remove indentation and trailing whitespace)
750                            // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
751                            // See: https://github.com/rvben/rumdl/issues/76
752                            let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
753
754                            // Check if this is a fence marker (opening or closing)
755                            // These should be treated as code block lines, not paragraph content
756                            if is_fence_marker(&content) {
757                                list_item_lines.push(LineType::CodeBlock(content, indent));
758                            }
759                            // Check if this is a semantic line (NOTE:, WARNING:, etc.)
760                            else if is_semantic_line(&content) {
761                                list_item_lines.push(LineType::SemanticLine(content));
762                            }
763                            // Check if this is a snippet block delimiter (-8<- or --8<--)
764                            // These must be preserved on their own lines for MkDocs Snippets extension
765                            else if is_snippet_block_delimiter(&content) {
766                                list_item_lines.push(LineType::SnippetLine(content));
767                            } else {
768                                list_item_lines.push(LineType::Content(content));
769                            }
770                            i += 1;
771                        } else {
772                            // indent >= marker_len + 4: indented code block
773                            list_item_lines.push(LineType::CodeBlock(
774                                line_info.content(ctx.content)[indent..].to_string(),
775                                indent,
776                            ));
777                            i += 1;
778                        }
779                    } else {
780                        // Not indented enough, end of list item
781                        break;
782                    }
783                }
784
785                // Use detected indent or fallback to marker length
786                let indent_size = actual_indent.unwrap_or(marker_len);
787                let expected_indent = " ".repeat(indent_size);
788
789                // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
790                #[derive(Clone)]
791                enum Block {
792                    Paragraph(Vec<String>),
793                    Code {
794                        lines: Vec<(String, usize)>, // (content, indent) pairs
795                        has_preceding_blank: bool,   // Whether there was a blank line before this block
796                    },
797                    NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
798                    SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
799                    SnippetLine(String),  // MkDocs Snippets delimiter that stays on its own line without extra spacing
800                    Html {
801                        lines: Vec<String>,        // HTML content preserved exactly as-is
802                        has_preceding_blank: bool, // Whether there was a blank line before this block
803                    },
804                }
805
806                // HTML tag detection helpers
807                // Block-level HTML tags that should trigger HTML block detection
808                const BLOCK_LEVEL_TAGS: &[&str] = &[
809                    "div",
810                    "details",
811                    "summary",
812                    "section",
813                    "article",
814                    "header",
815                    "footer",
816                    "nav",
817                    "aside",
818                    "main",
819                    "table",
820                    "thead",
821                    "tbody",
822                    "tfoot",
823                    "tr",
824                    "td",
825                    "th",
826                    "ul",
827                    "ol",
828                    "li",
829                    "dl",
830                    "dt",
831                    "dd",
832                    "pre",
833                    "blockquote",
834                    "figure",
835                    "figcaption",
836                    "form",
837                    "fieldset",
838                    "legend",
839                    "hr",
840                    "p",
841                    "h1",
842                    "h2",
843                    "h3",
844                    "h4",
845                    "h5",
846                    "h6",
847                    "style",
848                    "script",
849                    "noscript",
850                ];
851
852                fn is_block_html_opening_tag(line: &str) -> Option<String> {
853                    let trimmed = line.trim();
854
855                    // Check for HTML comments
856                    if trimmed.starts_with("<!--") {
857                        return Some("!--".to_string());
858                    }
859
860                    // Check for opening tags
861                    if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
862                        // Extract tag name from <tagname ...> or <tagname>
863                        let after_bracket = &trimmed[1..];
864                        if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
865                            let tag_name = after_bracket[..end].to_lowercase();
866
867                            // Only treat as block if it's a known block-level tag
868                            if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
869                                return Some(tag_name);
870                            }
871                        }
872                    }
873                    None
874                }
875
876                fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
877                    let trimmed = line.trim();
878
879                    // Special handling for HTML comments
880                    if tag_name == "!--" {
881                        return trimmed.ends_with("-->");
882                    }
883
884                    // Check for closing tags: </tagname> or </tagname ...>
885                    trimmed.starts_with(&format!("</{tag_name}>"))
886                        || trimmed.starts_with(&format!("</{tag_name}  "))
887                        || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
888                }
889
890                fn is_self_closing_tag(line: &str) -> bool {
891                    let trimmed = line.trim();
892                    trimmed.ends_with("/>")
893                }
894
895                let mut blocks: Vec<Block> = Vec::new();
896                let mut current_paragraph: Vec<String> = Vec::new();
897                let mut current_code_block: Vec<(String, usize)> = Vec::new();
898                let mut current_nested_list: Vec<(String, usize)> = Vec::new();
899                let mut current_html_block: Vec<String> = Vec::new();
900                let mut html_tag_stack: Vec<String> = Vec::new();
901                let mut in_code = false;
902                let mut in_nested_list = false;
903                let mut in_html_block = false;
904                let mut had_preceding_blank = false; // Track if we just saw an empty line
905                let mut code_block_has_preceding_blank = false; // Track blank before current code block
906                let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
907
908                for line in &list_item_lines {
909                    match line {
910                        LineType::Empty => {
911                            if in_code {
912                                current_code_block.push((String::new(), 0));
913                            } else if in_nested_list {
914                                current_nested_list.push((String::new(), 0));
915                            } else if in_html_block {
916                                // Allow blank lines inside HTML blocks
917                                current_html_block.push(String::new());
918                            } else if !current_paragraph.is_empty() {
919                                blocks.push(Block::Paragraph(current_paragraph.clone()));
920                                current_paragraph.clear();
921                            }
922                            // Mark that we saw a blank line
923                            had_preceding_blank = true;
924                        }
925                        LineType::Content(content) => {
926                            // Check if we're currently in an HTML block
927                            if in_html_block {
928                                current_html_block.push(content.clone());
929
930                                // Check if this line closes any open HTML tags
931                                if let Some(last_tag) = html_tag_stack.last() {
932                                    if is_html_closing_tag(content, last_tag) {
933                                        html_tag_stack.pop();
934
935                                        // If stack is empty, HTML block is complete
936                                        if html_tag_stack.is_empty() {
937                                            blocks.push(Block::Html {
938                                                lines: current_html_block.clone(),
939                                                has_preceding_blank: html_block_has_preceding_blank,
940                                            });
941                                            current_html_block.clear();
942                                            in_html_block = false;
943                                        }
944                                    } else if let Some(new_tag) = is_block_html_opening_tag(content) {
945                                        // Nested opening tag within HTML block
946                                        if !is_self_closing_tag(content) {
947                                            html_tag_stack.push(new_tag);
948                                        }
949                                    }
950                                }
951                                had_preceding_blank = false;
952                            } else {
953                                // Not in HTML block - check if this line starts one
954                                if let Some(tag_name) = is_block_html_opening_tag(content) {
955                                    // Flush current paragraph before starting HTML block
956                                    if in_code {
957                                        blocks.push(Block::Code {
958                                            lines: current_code_block.clone(),
959                                            has_preceding_blank: code_block_has_preceding_blank,
960                                        });
961                                        current_code_block.clear();
962                                        in_code = false;
963                                    } else if in_nested_list {
964                                        blocks.push(Block::NestedList(current_nested_list.clone()));
965                                        current_nested_list.clear();
966                                        in_nested_list = false;
967                                    } else if !current_paragraph.is_empty() {
968                                        blocks.push(Block::Paragraph(current_paragraph.clone()));
969                                        current_paragraph.clear();
970                                    }
971
972                                    // Start new HTML block
973                                    in_html_block = true;
974                                    html_block_has_preceding_blank = had_preceding_blank;
975                                    current_html_block.push(content.clone());
976
977                                    // Check if it's self-closing or needs a closing tag
978                                    if is_self_closing_tag(content) {
979                                        // Self-closing tag - complete the HTML block immediately
980                                        blocks.push(Block::Html {
981                                            lines: current_html_block.clone(),
982                                            has_preceding_blank: html_block_has_preceding_blank,
983                                        });
984                                        current_html_block.clear();
985                                        in_html_block = false;
986                                    } else {
987                                        // Regular opening tag - push to stack
988                                        html_tag_stack.push(tag_name);
989                                    }
990                                } else {
991                                    // Regular content line - add to paragraph
992                                    if in_code {
993                                        // Switching from code to content
994                                        blocks.push(Block::Code {
995                                            lines: current_code_block.clone(),
996                                            has_preceding_blank: code_block_has_preceding_blank,
997                                        });
998                                        current_code_block.clear();
999                                        in_code = false;
1000                                    } else if in_nested_list {
1001                                        // Switching from nested list to content
1002                                        blocks.push(Block::NestedList(current_nested_list.clone()));
1003                                        current_nested_list.clear();
1004                                        in_nested_list = false;
1005                                    }
1006                                    current_paragraph.push(content.clone());
1007                                }
1008                                had_preceding_blank = false; // Reset after content
1009                            }
1010                        }
1011                        LineType::CodeBlock(content, indent) => {
1012                            if in_nested_list {
1013                                // Switching from nested list to code
1014                                blocks.push(Block::NestedList(current_nested_list.clone()));
1015                                current_nested_list.clear();
1016                                in_nested_list = false;
1017                            } else if in_html_block {
1018                                // Switching from HTML block to code (shouldn't happen normally, but handle it)
1019                                blocks.push(Block::Html {
1020                                    lines: current_html_block.clone(),
1021                                    has_preceding_blank: html_block_has_preceding_blank,
1022                                });
1023                                current_html_block.clear();
1024                                html_tag_stack.clear();
1025                                in_html_block = false;
1026                            }
1027                            if !in_code {
1028                                // Switching from content to code
1029                                if !current_paragraph.is_empty() {
1030                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
1031                                    current_paragraph.clear();
1032                                }
1033                                in_code = true;
1034                                // Record whether there was a blank line before this code block
1035                                code_block_has_preceding_blank = had_preceding_blank;
1036                            }
1037                            current_code_block.push((content.clone(), *indent));
1038                            had_preceding_blank = false; // Reset after code
1039                        }
1040                        LineType::NestedListItem(content, indent) => {
1041                            if in_code {
1042                                // Switching from code to nested list
1043                                blocks.push(Block::Code {
1044                                    lines: current_code_block.clone(),
1045                                    has_preceding_blank: code_block_has_preceding_blank,
1046                                });
1047                                current_code_block.clear();
1048                                in_code = false;
1049                            } else if in_html_block {
1050                                // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1051                                blocks.push(Block::Html {
1052                                    lines: current_html_block.clone(),
1053                                    has_preceding_blank: html_block_has_preceding_blank,
1054                                });
1055                                current_html_block.clear();
1056                                html_tag_stack.clear();
1057                                in_html_block = false;
1058                            }
1059                            if !in_nested_list {
1060                                // Switching from content to nested list
1061                                if !current_paragraph.is_empty() {
1062                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
1063                                    current_paragraph.clear();
1064                                }
1065                                in_nested_list = true;
1066                            }
1067                            current_nested_list.push((content.clone(), *indent));
1068                            had_preceding_blank = false; // Reset after nested list
1069                        }
1070                        LineType::SemanticLine(content) => {
1071                            // Semantic lines are standalone - flush any current block and add as separate block
1072                            if in_code {
1073                                blocks.push(Block::Code {
1074                                    lines: current_code_block.clone(),
1075                                    has_preceding_blank: code_block_has_preceding_blank,
1076                                });
1077                                current_code_block.clear();
1078                                in_code = false;
1079                            } else if in_nested_list {
1080                                blocks.push(Block::NestedList(current_nested_list.clone()));
1081                                current_nested_list.clear();
1082                                in_nested_list = false;
1083                            } else if in_html_block {
1084                                blocks.push(Block::Html {
1085                                    lines: current_html_block.clone(),
1086                                    has_preceding_blank: html_block_has_preceding_blank,
1087                                });
1088                                current_html_block.clear();
1089                                html_tag_stack.clear();
1090                                in_html_block = false;
1091                            } else if !current_paragraph.is_empty() {
1092                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1093                                current_paragraph.clear();
1094                            }
1095                            // Add semantic line as its own block
1096                            blocks.push(Block::SemanticLine(content.clone()));
1097                            had_preceding_blank = false; // Reset after semantic line
1098                        }
1099                        LineType::SnippetLine(content) => {
1100                            // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1101                            // Unlike semantic lines, snippet lines don't add extra blank lines around them
1102                            if in_code {
1103                                blocks.push(Block::Code {
1104                                    lines: current_code_block.clone(),
1105                                    has_preceding_blank: code_block_has_preceding_blank,
1106                                });
1107                                current_code_block.clear();
1108                                in_code = false;
1109                            } else if in_nested_list {
1110                                blocks.push(Block::NestedList(current_nested_list.clone()));
1111                                current_nested_list.clear();
1112                                in_nested_list = false;
1113                            } else if in_html_block {
1114                                blocks.push(Block::Html {
1115                                    lines: current_html_block.clone(),
1116                                    has_preceding_blank: html_block_has_preceding_blank,
1117                                });
1118                                current_html_block.clear();
1119                                html_tag_stack.clear();
1120                                in_html_block = false;
1121                            } else if !current_paragraph.is_empty() {
1122                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1123                                current_paragraph.clear();
1124                            }
1125                            // Add snippet line as its own block
1126                            blocks.push(Block::SnippetLine(content.clone()));
1127                            had_preceding_blank = false;
1128                        }
1129                    }
1130                }
1131
1132                // Push remaining block
1133                if in_code && !current_code_block.is_empty() {
1134                    blocks.push(Block::Code {
1135                        lines: current_code_block,
1136                        has_preceding_blank: code_block_has_preceding_blank,
1137                    });
1138                } else if in_nested_list && !current_nested_list.is_empty() {
1139                    blocks.push(Block::NestedList(current_nested_list));
1140                } else if in_html_block && !current_html_block.is_empty() {
1141                    // If we still have an unclosed HTML block, push it anyway
1142                    // (malformed HTML - missing closing tag)
1143                    blocks.push(Block::Html {
1144                        lines: current_html_block,
1145                        has_preceding_blank: html_block_has_preceding_blank,
1146                    });
1147                } else if !current_paragraph.is_empty() {
1148                    blocks.push(Block::Paragraph(current_paragraph));
1149                }
1150
1151                // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1152                let content_lines: Vec<String> = list_item_lines
1153                    .iter()
1154                    .filter_map(|line| {
1155                        if let LineType::Content(s) = line {
1156                            Some(s.clone())
1157                        } else {
1158                            None
1159                        }
1160                    })
1161                    .collect();
1162
1163                // Check if we need to reflow this list item
1164                // We check the combined content to see if it exceeds length limits
1165                let combined_content = content_lines.join(" ").trim().to_string();
1166                let full_line = format!("{marker}{combined_content}");
1167
1168                // Helper to check if we should reflow in normalize mode
1169                let should_normalize = || {
1170                    // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1171                    // DO normalize if it has plain text content that spans multiple lines
1172                    let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1173                    let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1174                    let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1175                    let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1176                    let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1177
1178                    // If we have nested lists, code blocks, semantic lines, or snippet lines but no paragraphs, don't normalize
1179                    if (has_nested_lists || has_code_blocks || has_semantic_lines || has_snippet_lines)
1180                        && !has_paragraphs
1181                    {
1182                        return false;
1183                    }
1184
1185                    // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1186                    if has_paragraphs {
1187                        let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1188                        if paragraph_count > 1 {
1189                            // Multiple paragraph blocks should be normalized
1190                            return true;
1191                        }
1192
1193                        // Single paragraph block: normalize if it has multiple content lines
1194                        if content_lines.len() > 1 {
1195                            return true;
1196                        }
1197                    }
1198
1199                    false
1200                };
1201
1202                let needs_reflow = match config.reflow_mode {
1203                    ReflowMode::Normalize => {
1204                        // Only reflow if:
1205                        // 1. The combined line would exceed the limit, OR
1206                        // 2. The list item should be normalized (has multi-line plain text)
1207                        let combined_length = self.calculate_effective_length(&full_line);
1208                        if combined_length > config.line_length.get() {
1209                            true
1210                        } else {
1211                            should_normalize()
1212                        }
1213                    }
1214                    ReflowMode::SentencePerLine => {
1215                        // Check if list item has multiple sentences
1216                        let sentences = split_into_sentences(&combined_content);
1217                        sentences.len() > 1
1218                    }
1219                    ReflowMode::Default => {
1220                        // In default mode, only reflow if any individual line exceeds limit
1221                        // Check the original lines, not the combined content
1222                        (list_start..i)
1223                            .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1224                    }
1225                };
1226
1227                if needs_reflow {
1228                    let start_range = line_index.whole_line_range(list_start + 1);
1229                    let end_line = i - 1;
1230                    let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1231                        line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1232                    } else {
1233                        line_index.whole_line_range(end_line + 1)
1234                    };
1235                    let byte_range = start_range.start..end_range.end;
1236
1237                    // Reflow each block (paragraphs only, preserve code blocks)
1238                    // When line_length = 0 (no limit), use a very large value for reflow
1239                    let reflow_line_length = if config.line_length.is_unlimited() {
1240                        usize::MAX
1241                    } else {
1242                        config.line_length.get().saturating_sub(indent_size).max(1)
1243                    };
1244                    let reflow_options = crate::utils::text_reflow::ReflowOptions {
1245                        line_length: reflow_line_length,
1246                        break_on_sentences: true,
1247                        preserve_breaks: false,
1248                        sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1249                        abbreviations: config.abbreviations_for_reflow(),
1250                    };
1251
1252                    let mut result: Vec<String> = Vec::new();
1253                    let mut is_first_block = true;
1254
1255                    for (block_idx, block) in blocks.iter().enumerate() {
1256                        match block {
1257                            Block::Paragraph(para_lines) => {
1258                                // Split the paragraph into segments at hard break boundaries
1259                                // Each segment can be reflowed independently
1260                                let segments = split_into_segments(para_lines);
1261
1262                                for (segment_idx, segment) in segments.iter().enumerate() {
1263                                    // Check if this segment ends with a hard break and what type
1264                                    let hard_break_type = segment.last().and_then(|line| {
1265                                        let line = line.strip_suffix('\r').unwrap_or(line);
1266                                        if line.ends_with('\\') {
1267                                            Some("\\")
1268                                        } else if line.ends_with("  ") {
1269                                            Some("  ")
1270                                        } else {
1271                                            None
1272                                        }
1273                                    });
1274
1275                                    // Join and reflow the segment (removing the hard break marker for processing)
1276                                    let segment_for_reflow: Vec<String> = segment
1277                                        .iter()
1278                                        .map(|line| {
1279                                            // Strip hard break marker (2 spaces or backslash) for reflow processing
1280                                            if line.ends_with('\\') {
1281                                                line[..line.len() - 1].trim_end().to_string()
1282                                            } else if line.ends_with("  ") {
1283                                                line[..line.len() - 2].trim_end().to_string()
1284                                            } else {
1285                                                line.clone()
1286                                            }
1287                                        })
1288                                        .collect();
1289
1290                                    let segment_text = segment_for_reflow.join(" ").trim().to_string();
1291                                    if !segment_text.is_empty() {
1292                                        let reflowed =
1293                                            crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1294
1295                                        if is_first_block && segment_idx == 0 {
1296                                            // First segment of first block starts with marker
1297                                            result.push(format!("{marker}{}", reflowed[0]));
1298                                            for line in reflowed.iter().skip(1) {
1299                                                result.push(format!("{expected_indent}{line}"));
1300                                            }
1301                                            is_first_block = false;
1302                                        } else {
1303                                            // Subsequent segments
1304                                            for line in reflowed {
1305                                                result.push(format!("{expected_indent}{line}"));
1306                                            }
1307                                        }
1308
1309                                        // If this segment had a hard break, add it back to the last line
1310                                        // Preserve the original hard break format (backslash or two spaces)
1311                                        if let Some(break_marker) = hard_break_type
1312                                            && let Some(last_line) = result.last_mut()
1313                                        {
1314                                            last_line.push_str(break_marker);
1315                                        }
1316                                    }
1317                                }
1318
1319                                // Add blank line after paragraph block if there's a next block
1320                                // BUT: check if next block is a code block that doesn't want a preceding blank
1321                                // Also don't add blank lines before snippet lines (they should stay tight)
1322                                if block_idx < blocks.len() - 1 {
1323                                    let next_block = &blocks[block_idx + 1];
1324                                    let should_add_blank = match next_block {
1325                                        Block::Code {
1326                                            has_preceding_blank, ..
1327                                        } => *has_preceding_blank,
1328                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1329                                        _ => true,                      // For all other blocks, add blank line
1330                                    };
1331                                    if should_add_blank {
1332                                        result.push(String::new());
1333                                    }
1334                                }
1335                            }
1336                            Block::Code {
1337                                lines: code_lines,
1338                                has_preceding_blank: _,
1339                            } => {
1340                                // Preserve code blocks as-is with original indentation
1341                                // NOTE: Blank line before code block is handled by the previous block
1342                                // (see paragraph block's logic above)
1343
1344                                for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1345                                    if is_first_block && idx == 0 {
1346                                        // First line of first block gets marker
1347                                        result.push(format!(
1348                                            "{marker}{}",
1349                                            " ".repeat(orig_indent - marker_len) + content
1350                                        ));
1351                                        is_first_block = false;
1352                                    } else if content.is_empty() {
1353                                        result.push(String::new());
1354                                    } else {
1355                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1356                                    }
1357                                }
1358                            }
1359                            Block::NestedList(nested_items) => {
1360                                // Preserve nested list items as-is with original indentation
1361                                if !is_first_block {
1362                                    result.push(String::new());
1363                                }
1364
1365                                for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1366                                    if is_first_block && idx == 0 {
1367                                        // First line of first block gets marker
1368                                        result.push(format!(
1369                                            "{marker}{}",
1370                                            " ".repeat(orig_indent - marker_len) + content
1371                                        ));
1372                                        is_first_block = false;
1373                                    } else if content.is_empty() {
1374                                        result.push(String::new());
1375                                    } else {
1376                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1377                                    }
1378                                }
1379
1380                                // Add blank line after nested list if there's a next block
1381                                // Check if next block is a code block that doesn't want a preceding blank
1382                                if block_idx < blocks.len() - 1 {
1383                                    let next_block = &blocks[block_idx + 1];
1384                                    let should_add_blank = match next_block {
1385                                        Block::Code {
1386                                            has_preceding_blank, ..
1387                                        } => *has_preceding_blank,
1388                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1389                                        _ => true,                      // For all other blocks, add blank line
1390                                    };
1391                                    if should_add_blank {
1392                                        result.push(String::new());
1393                                    }
1394                                }
1395                            }
1396                            Block::SemanticLine(content) => {
1397                                // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1398                                // Add blank line before if not first block
1399                                if !is_first_block {
1400                                    result.push(String::new());
1401                                }
1402
1403                                if is_first_block {
1404                                    // First block starts with marker
1405                                    result.push(format!("{marker}{content}"));
1406                                    is_first_block = false;
1407                                } else {
1408                                    // Subsequent blocks use expected indent
1409                                    result.push(format!("{expected_indent}{content}"));
1410                                }
1411
1412                                // Add blank line after semantic line if there's a next block
1413                                // Check if next block is a code block that doesn't want a preceding blank
1414                                if block_idx < blocks.len() - 1 {
1415                                    let next_block = &blocks[block_idx + 1];
1416                                    let should_add_blank = match next_block {
1417                                        Block::Code {
1418                                            has_preceding_blank, ..
1419                                        } => *has_preceding_blank,
1420                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1421                                        _ => true,                      // For all other blocks, add blank line
1422                                    };
1423                                    if should_add_blank {
1424                                        result.push(String::new());
1425                                    }
1426                                }
1427                            }
1428                            Block::SnippetLine(content) => {
1429                                // Preserve snippet delimiters (-8<-) as-is on their own line
1430                                // Unlike semantic lines, snippet lines don't add extra blank lines
1431                                if is_first_block {
1432                                    // First block starts with marker
1433                                    result.push(format!("{marker}{content}"));
1434                                    is_first_block = false;
1435                                } else {
1436                                    // Subsequent blocks use expected indent
1437                                    result.push(format!("{expected_indent}{content}"));
1438                                }
1439                                // No blank lines added before or after snippet delimiters
1440                            }
1441                            Block::Html {
1442                                lines: html_lines,
1443                                has_preceding_blank: _,
1444                            } => {
1445                                // Preserve HTML blocks exactly as-is with original indentation
1446                                // NOTE: Blank line before HTML block is handled by the previous block
1447
1448                                for (idx, line) in html_lines.iter().enumerate() {
1449                                    if is_first_block && idx == 0 {
1450                                        // First line of first block gets marker
1451                                        result.push(format!("{marker}{line}"));
1452                                        is_first_block = false;
1453                                    } else if line.is_empty() {
1454                                        // Preserve blank lines inside HTML blocks
1455                                        result.push(String::new());
1456                                    } else {
1457                                        // Preserve lines with their original content (already includes indentation)
1458                                        result.push(format!("{expected_indent}{line}"));
1459                                    }
1460                                }
1461
1462                                // Add blank line after HTML block if there's a next block
1463                                if block_idx < blocks.len() - 1 {
1464                                    let next_block = &blocks[block_idx + 1];
1465                                    let should_add_blank = match next_block {
1466                                        Block::Code {
1467                                            has_preceding_blank, ..
1468                                        } => *has_preceding_blank,
1469                                        Block::Html {
1470                                            has_preceding_blank, ..
1471                                        } => *has_preceding_blank,
1472                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1473                                        _ => true,                      // For all other blocks, add blank line
1474                                    };
1475                                    if should_add_blank {
1476                                        result.push(String::new());
1477                                    }
1478                                }
1479                            }
1480                        }
1481                    }
1482
1483                    let reflowed_text = result.join("\n");
1484
1485                    // Preserve trailing newline
1486                    let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1487                        format!("{reflowed_text}\n")
1488                    } else {
1489                        reflowed_text
1490                    };
1491
1492                    // Get the original text to compare
1493                    let original_text = &ctx.content[byte_range.clone()];
1494
1495                    // Only generate a warning if the replacement is different from the original
1496                    if original_text != replacement {
1497                        // Generate an appropriate message based on why reflow is needed
1498                        let message = match config.reflow_mode {
1499                            ReflowMode::SentencePerLine => {
1500                                let num_sentences = split_into_sentences(&combined_content).len();
1501                                let num_lines = content_lines.len();
1502                                if num_lines == 1 {
1503                                    // Single line with multiple sentences
1504                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1505                                } else {
1506                                    // Multiple lines - could be split sentences or mixed
1507                                    format!(
1508                                        "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1509                                    )
1510                                }
1511                            }
1512                            ReflowMode::Normalize => {
1513                                let combined_length = self.calculate_effective_length(&full_line);
1514                                if combined_length > config.line_length.get() {
1515                                    format!(
1516                                        "Line length {} exceeds {} characters",
1517                                        combined_length,
1518                                        config.line_length.get()
1519                                    )
1520                                } else {
1521                                    "Multi-line content can be normalized".to_string()
1522                                }
1523                            }
1524                            ReflowMode::Default => {
1525                                let combined_length = self.calculate_effective_length(&full_line);
1526                                format!(
1527                                    "Line length {} exceeds {} characters",
1528                                    combined_length,
1529                                    config.line_length.get()
1530                                )
1531                            }
1532                        };
1533
1534                        warnings.push(LintWarning {
1535                            rule_name: Some(self.name().to_string()),
1536                            message,
1537                            line: list_start + 1,
1538                            column: 1,
1539                            end_line: end_line + 1,
1540                            end_column: lines[end_line].len() + 1,
1541                            severity: Severity::Warning,
1542                            fix: Some(crate::rule::Fix {
1543                                range: byte_range,
1544                                replacement,
1545                            }),
1546                        });
1547                    }
1548                }
1549                continue;
1550            }
1551
1552            // Found start of a paragraph - collect all lines in it
1553            let paragraph_start = i;
1554            let mut paragraph_lines = vec![lines[i]];
1555            i += 1;
1556
1557            while i < lines.len() {
1558                let next_line = lines[i];
1559                let next_line_num = i + 1;
1560                let next_trimmed = next_line.trim();
1561
1562                // Stop at paragraph boundaries
1563                if next_trimmed.is_empty()
1564                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1565                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1566                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1567                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1568                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1569                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1570                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1571                    || ctx
1572                        .line_info(next_line_num)
1573                        .is_some_and(|info| info.in_mkdocs_container())
1574                    || (next_line_num > 0
1575                        && next_line_num <= ctx.lines.len()
1576                        && ctx.lines[next_line_num - 1].blockquote.is_some())
1577                    || next_trimmed.starts_with('#')
1578                    || TableUtils::is_potential_table_row(next_line)
1579                    || is_list_item(next_trimmed)
1580                    || is_horizontal_rule(next_trimmed)
1581                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1582                    || is_template_directive_only(next_line)
1583                    || is_standalone_attr_list(next_line)
1584                    || is_snippet_block_delimiter(next_line)
1585                {
1586                    break;
1587                }
1588
1589                // Check if the previous line ends with a hard break (2+ spaces or backslash)
1590                if i > 0 && has_hard_break(lines[i - 1]) {
1591                    // Don't include lines after hard breaks in the same paragraph
1592                    break;
1593                }
1594
1595                paragraph_lines.push(next_line);
1596                i += 1;
1597            }
1598
1599            // Combine paragraph lines into a single string for processing
1600            // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1601            let paragraph_text = paragraph_lines.join(" ");
1602
1603            // Skip reflowing if this paragraph contains definition list items
1604            // Definition lists are multi-line structures that should not be joined
1605            let contains_definition_list = paragraph_lines
1606                .iter()
1607                .any(|line| crate::utils::is_definition_list_item(line));
1608
1609            if contains_definition_list {
1610                // Don't reflow definition lists - skip this paragraph
1611                i = paragraph_start + paragraph_lines.len();
1612                continue;
1613            }
1614
1615            // Skip reflowing if this paragraph contains MkDocs Snippets markers
1616            // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1617            let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1618
1619            if contains_snippets {
1620                // Don't reflow Snippets blocks - skip this paragraph
1621                i = paragraph_start + paragraph_lines.len();
1622                continue;
1623            }
1624
1625            // Check if this paragraph needs reflowing
1626            let needs_reflow = match config.reflow_mode {
1627                ReflowMode::Normalize => {
1628                    // In normalize mode, reflow multi-line paragraphs
1629                    paragraph_lines.len() > 1
1630                }
1631                ReflowMode::SentencePerLine => {
1632                    // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1633                    // Note: we check the joined text because sentences can span multiple lines
1634                    let sentences = split_into_sentences(&paragraph_text);
1635
1636                    // Always reflow if multiple sentences on one line
1637                    if sentences.len() > 1 {
1638                        true
1639                    } else if paragraph_lines.len() > 1 {
1640                        // For single-sentence paragraphs spanning multiple lines:
1641                        // Reflow if they COULD fit on one line (respecting line-length constraint)
1642                        if config.line_length.is_unlimited() {
1643                            // No line-length constraint - always join single sentences
1644                            true
1645                        } else {
1646                            // Only join if it fits within line-length
1647                            let effective_length = self.calculate_effective_length(&paragraph_text);
1648                            effective_length <= config.line_length.get()
1649                        }
1650                    } else {
1651                        false
1652                    }
1653                }
1654                ReflowMode::Default => {
1655                    // In default mode, only reflow if lines exceed limit
1656                    paragraph_lines
1657                        .iter()
1658                        .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1659                }
1660            };
1661
1662            if needs_reflow {
1663                // Calculate byte range for this paragraph
1664                // Use whole_line_range for each line and combine
1665                let start_range = line_index.whole_line_range(paragraph_start + 1);
1666                let end_line = paragraph_start + paragraph_lines.len() - 1;
1667
1668                // For the last line, we want to preserve any trailing newline
1669                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1670                    // Last line without trailing newline - use line_text_range
1671                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1672                } else {
1673                    // Not the last line or has trailing newline - use whole_line_range
1674                    line_index.whole_line_range(end_line + 1)
1675                };
1676
1677                let byte_range = start_range.start..end_range.end;
1678
1679                // Check if the paragraph ends with a hard break and what type
1680                let hard_break_type = paragraph_lines.last().and_then(|line| {
1681                    let line = line.strip_suffix('\r').unwrap_or(line);
1682                    if line.ends_with('\\') {
1683                        Some("\\")
1684                    } else if line.ends_with("  ") {
1685                        Some("  ")
1686                    } else {
1687                        None
1688                    }
1689                });
1690
1691                // Reflow the paragraph
1692                // When line_length = 0 (no limit), use a very large value for reflow
1693                let reflow_line_length = if config.line_length.is_unlimited() {
1694                    usize::MAX
1695                } else {
1696                    config.line_length.get()
1697                };
1698                let reflow_options = crate::utils::text_reflow::ReflowOptions {
1699                    line_length: reflow_line_length,
1700                    break_on_sentences: true,
1701                    preserve_breaks: false,
1702                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1703                    abbreviations: config.abbreviations_for_reflow(),
1704                };
1705                let mut reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
1706
1707                // If the original paragraph ended with a hard break, preserve it
1708                // Preserve the original hard break format (backslash or two spaces)
1709                if let Some(break_marker) = hard_break_type
1710                    && !reflowed.is_empty()
1711                {
1712                    let last_idx = reflowed.len() - 1;
1713                    if !has_hard_break(&reflowed[last_idx]) {
1714                        reflowed[last_idx].push_str(break_marker);
1715                    }
1716                }
1717
1718                let reflowed_text = reflowed.join("\n");
1719
1720                // Preserve trailing newline if the original paragraph had one
1721                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1722                    format!("{reflowed_text}\n")
1723                } else {
1724                    reflowed_text
1725                };
1726
1727                // Get the original text to compare
1728                let original_text = &ctx.content[byte_range.clone()];
1729
1730                // Only generate a warning if the replacement is different from the original
1731                if original_text != replacement {
1732                    // Create warning with actual fix
1733                    // In default mode, report the specific line that violates
1734                    // In normalize mode, report the whole paragraph
1735                    // In sentence-per-line mode, report the entire paragraph
1736                    let (warning_line, warning_end_line) = match config.reflow_mode {
1737                        ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1738                        ReflowMode::SentencePerLine => {
1739                            // Highlight the entire paragraph that needs reformatting
1740                            (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1741                        }
1742                        ReflowMode::Default => {
1743                            // Find the first line that exceeds the limit
1744                            let mut violating_line = paragraph_start;
1745                            for (idx, line) in paragraph_lines.iter().enumerate() {
1746                                if self.calculate_effective_length(line) > config.line_length.get() {
1747                                    violating_line = paragraph_start + idx;
1748                                    break;
1749                                }
1750                            }
1751                            (violating_line + 1, violating_line + 1)
1752                        }
1753                    };
1754
1755                    warnings.push(LintWarning {
1756                        rule_name: Some(self.name().to_string()),
1757                        message: match config.reflow_mode {
1758                            ReflowMode::Normalize => format!(
1759                                "Paragraph could be normalized to use line length of {} characters",
1760                                config.line_length.get()
1761                            ),
1762                            ReflowMode::SentencePerLine => {
1763                                let num_sentences = split_into_sentences(&paragraph_text).len();
1764                                if paragraph_lines.len() == 1 {
1765                                    // Single line with multiple sentences
1766                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1767                                } else {
1768                                    let num_lines = paragraph_lines.len();
1769                                    // Multiple lines - could be split sentences or mixed
1770                                    format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1771                                }
1772                            },
1773                            ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1774                        },
1775                        line: warning_line,
1776                        column: 1,
1777                        end_line: warning_end_line,
1778                        end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1779                        severity: Severity::Warning,
1780                        fix: Some(crate::rule::Fix {
1781                            range: byte_range,
1782                            replacement,
1783                        }),
1784                    });
1785                }
1786            }
1787        }
1788
1789        warnings
1790    }
1791
1792    /// Calculate string length based on the configured length mode
1793    fn calculate_string_length(&self, s: &str) -> usize {
1794        match self.config.length_mode {
1795            LengthMode::Chars => s.chars().count(),
1796            LengthMode::Visual => s.width(),
1797            LengthMode::Bytes => s.len(),
1798        }
1799    }
1800
1801    /// Calculate effective line length excluding unbreakable URLs
1802    fn calculate_effective_length(&self, line: &str) -> usize {
1803        if self.config.strict {
1804            // In strict mode, count everything
1805            return self.calculate_string_length(line);
1806        }
1807
1808        // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1809        let bytes = line.as_bytes();
1810        if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1811            return self.calculate_string_length(line);
1812        }
1813
1814        // More precise check for URLs and links
1815        if !line.contains("http") && !line.contains('[') {
1816            return self.calculate_string_length(line);
1817        }
1818
1819        let mut effective_line = line.to_string();
1820
1821        // First handle markdown links to avoid double-counting URLs
1822        // Pattern: [text](very-long-url) -> [text](url)
1823        if line.contains('[') && line.contains("](") {
1824            for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1825                if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1826                    && url.as_str().len() > 15
1827                {
1828                    let replacement = format!("[{}](url)", text.as_str());
1829                    effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1830                }
1831            }
1832        }
1833
1834        // Then replace bare URLs with a placeholder of reasonable length
1835        // This allows lines with long URLs to pass if the rest of the content is reasonable
1836        if effective_line.contains("http") {
1837            for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1838                let url = url_match.as_str();
1839                // Skip if this URL is already part of a markdown link we handled
1840                if !effective_line.contains(&format!("({url})")) {
1841                    // Replace URL with placeholder that represents a "reasonable" URL length
1842                    // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1843                    let placeholder = "x".repeat(15.min(url.len()));
1844                    effective_line = effective_line.replacen(url, &placeholder, 1);
1845                }
1846            }
1847        }
1848
1849        self.calculate_string_length(&effective_line)
1850    }
1851}