Skip to main content

rumdl_lib/rules/md013_line_length/
mod.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
7use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
8use crate::utils::range_utils::LineIndex;
9use crate::utils::range_utils::calculate_excess_range;
10use crate::utils::regex_cache::{
11    IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
12};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::split_into_sentences;
15use toml;
16
17mod helpers;
18pub mod md013_config;
19use helpers::{
20    extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
21    split_into_segments, trim_preserving_hard_break,
22};
23pub use md013_config::MD013Config;
24use md013_config::{LengthMode, ReflowMode};
25
26#[cfg(test)]
27mod tests;
28use unicode_width::UnicodeWidthStr;
29
30#[derive(Clone, Default)]
31pub struct MD013LineLength {
32    pub(crate) config: MD013Config,
33}
34
35impl MD013LineLength {
36    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
37        Self {
38            config: MD013Config {
39                line_length: crate::types::LineLength::new(line_length),
40                code_blocks,
41                tables,
42                headings,
43                paragraphs: true, // Default to true for backwards compatibility
44                strict,
45                reflow: false,
46                reflow_mode: ReflowMode::default(),
47                length_mode: LengthMode::default(),
48                abbreviations: Vec::new(),
49            },
50        }
51    }
52
53    pub fn from_config_struct(config: MD013Config) -> Self {
54        Self { config }
55    }
56
57    fn should_ignore_line(
58        &self,
59        line: &str,
60        _lines: &[&str],
61        current_line: usize,
62        ctx: &crate::lint_context::LintContext,
63    ) -> bool {
64        if self.config.strict {
65            return false;
66        }
67
68        // Quick check for common patterns before expensive regex
69        let trimmed = line.trim();
70
71        // Only skip if the entire line is a URL (quick check first)
72        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
73            return true;
74        }
75
76        // Only skip if the entire line is an image reference (quick check first)
77        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
78            return true;
79        }
80
81        // Only skip if the entire line is a link reference (quick check first)
82        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
83            return true;
84        }
85
86        // Code blocks with long strings (only check if in code block)
87        if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
88            && !trimmed.is_empty()
89            && !line.contains(' ')
90            && !line.contains('\t')
91        {
92            return true;
93        }
94
95        false
96    }
97
98    /// Check if rule should skip based on provided config (used for inline config support)
99    fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
100        // Skip if content is empty
101        if ctx.content.is_empty() {
102            return true;
103        }
104
105        // For sentence-per-line or normalize mode, never skip based on line length
106        if config.reflow
107            && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
108        {
109            return false;
110        }
111
112        // Quick check: if total content is shorter than line limit, definitely skip
113        if ctx.content.len() <= config.line_length.get() {
114            return true;
115        }
116
117        // Skip if no line exceeds the limit
118        !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
119    }
120}
121
122impl Rule for MD013LineLength {
123    fn name(&self) -> &'static str {
124        "MD013"
125    }
126
127    fn description(&self) -> &'static str {
128        "Line length should not be excessive"
129    }
130
131    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
132        let content = ctx.content;
133
134        // Parse inline configuration FIRST so we can use effective config for should_skip
135        let inline_config = crate::inline_config::InlineConfig::from_content(content);
136        let config_override = inline_config.get_rule_config("MD013");
137
138        // Apply configuration override if present
139        let effective_config = if let Some(json_config) = config_override {
140            if let Some(obj) = json_config.as_object() {
141                let mut config = self.config.clone();
142                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
143                    config.line_length = crate::types::LineLength::new(line_length as usize);
144                }
145                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
146                    config.code_blocks = code_blocks;
147                }
148                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
149                    config.tables = tables;
150                }
151                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
152                    config.headings = headings;
153                }
154                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
155                    config.strict = strict;
156                }
157                if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
158                    config.reflow = reflow;
159                }
160                if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
161                    config.reflow_mode = match reflow_mode {
162                        "default" => ReflowMode::Default,
163                        "normalize" => ReflowMode::Normalize,
164                        "sentence-per-line" => ReflowMode::SentencePerLine,
165                        _ => ReflowMode::default(),
166                    };
167                }
168                config
169            } else {
170                self.config.clone()
171            }
172        } else {
173            self.config.clone()
174        };
175
176        // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
177        // But don't skip if we're in reflow mode with Normalize or SentencePerLine
178        if self.should_skip_with_config(ctx, &effective_config)
179            && !(effective_config.reflow
180                && (effective_config.reflow_mode == ReflowMode::Normalize
181                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
182        {
183            return Ok(Vec::new());
184        }
185
186        // Direct implementation without DocumentStructure
187        let mut warnings = Vec::new();
188
189        // Special handling: line_length = 0 means "no line length limit"
190        // Skip all line length checks, but still allow reflow if enabled
191        let skip_length_checks = effective_config.line_length.is_unlimited();
192
193        // Pre-filter lines that could be problematic to avoid processing all lines
194        let mut candidate_lines = Vec::new();
195        if !skip_length_checks {
196            for (line_idx, line_info) in ctx.lines.iter().enumerate() {
197                // Skip front matter - it should never be linted
198                if line_info.in_front_matter {
199                    continue;
200                }
201
202                // Quick length check first
203                if line_info.byte_len > effective_config.line_length.get() {
204                    candidate_lines.push(line_idx);
205                }
206            }
207        }
208
209        // If no candidate lines and not in normalize or sentence-per-line mode, early return
210        if candidate_lines.is_empty()
211            && !(effective_config.reflow
212                && (effective_config.reflow_mode == ReflowMode::Normalize
213                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
214        {
215            return Ok(warnings);
216        }
217
218        // Use ctx.lines if available for better performance
219        let lines: Vec<&str> = if !ctx.lines.is_empty() {
220            ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
221        } else {
222            content.lines().collect()
223        };
224
225        // Create a quick lookup set for heading lines
226        // We need this for both the heading skip check AND the paragraphs check
227        let heading_lines_set: std::collections::HashSet<usize> = ctx
228            .lines
229            .iter()
230            .enumerate()
231            .filter(|(_, line)| line.heading.is_some())
232            .map(|(idx, _)| idx + 1)
233            .collect();
234
235        // Use pre-computed table blocks from context
236        // We need this for both the table skip check AND the paragraphs check
237        let table_blocks = &ctx.table_blocks;
238        let mut table_lines_set = std::collections::HashSet::new();
239        for table in table_blocks {
240            table_lines_set.insert(table.header_line + 1);
241            table_lines_set.insert(table.delimiter_line + 1);
242            for &line in &table.content_lines {
243                table_lines_set.insert(line + 1);
244            }
245        }
246
247        // Process candidate lines for line length checks
248        for &line_idx in &candidate_lines {
249            let line_number = line_idx + 1;
250            let line = lines[line_idx];
251
252            // Calculate effective length excluding unbreakable URLs
253            let effective_length = self.calculate_effective_length(line);
254
255            // Use single line length limit for all content
256            let line_limit = effective_config.line_length.get();
257
258            // Skip short lines immediately (double-check after effective length calculation)
259            if effective_length <= line_limit {
260                continue;
261            }
262
263            // Skip mkdocstrings blocks (already handled by LintContext)
264            if ctx.lines[line_idx].in_mkdocstrings {
265                continue;
266            }
267
268            // Skip various block types efficiently
269            if !effective_config.strict {
270                // Skip setext heading underlines
271                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
272                    continue;
273                }
274
275                // Skip block elements according to config flags
276                // The flags mean: true = check these elements, false = skip these elements
277                // So we skip when the flag is FALSE and the line is in that element type
278                if (!effective_config.headings && heading_lines_set.contains(&line_number))
279                    || (!effective_config.code_blocks
280                        && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
281                    || (!effective_config.tables && table_lines_set.contains(&line_number))
282                    || ctx.lines[line_number - 1].blockquote.is_some()
283                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
284                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
285                    || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
286                    || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
287                    || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
288                {
289                    continue;
290                }
291
292                // Check if this is a paragraph/regular text line
293                // If paragraphs = false, skip lines that are NOT in special blocks
294                if !effective_config.paragraphs {
295                    let is_special_block = heading_lines_set.contains(&line_number)
296                        || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
297                        || table_lines_set.contains(&line_number)
298                        || ctx.lines[line_number - 1].blockquote.is_some()
299                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
300                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
301                        || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
302                        || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
303                        || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment);
304
305                    // Skip regular paragraph text when paragraphs = false
306                    if !is_special_block {
307                        continue;
308                    }
309                }
310
311                // Skip lines that are only a URL, image ref, or link ref
312                if self.should_ignore_line(line, &lines, line_idx, ctx) {
313                    continue;
314                }
315            }
316
317            // In sentence-per-line mode, check if this is a single long sentence
318            // If so, emit a warning without a fix (user must manually rephrase)
319            if effective_config.reflow_mode == ReflowMode::SentencePerLine {
320                let sentences = split_into_sentences(line.trim());
321                if sentences.len() == 1 {
322                    // Single sentence that's too long - warn but don't auto-fix
323                    let message = format!("Line length {effective_length} exceeds {line_limit} characters");
324
325                    let (start_line, start_col, end_line, end_col) =
326                        calculate_excess_range(line_number, line, line_limit);
327
328                    warnings.push(LintWarning {
329                        rule_name: Some(self.name().to_string()),
330                        message,
331                        line: start_line,
332                        column: start_col,
333                        end_line,
334                        end_column: end_col,
335                        severity: Severity::Warning,
336                        fix: None, // No auto-fix for long single sentences
337                    });
338                    continue;
339                }
340                // Multiple sentences will be handled by paragraph-based reflow
341                continue;
342            }
343
344            // Don't provide fix for individual lines when reflow is enabled
345            // Paragraph-based fixes will be handled separately
346            let fix = None;
347
348            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
349
350            // Calculate precise character range for the excess portion
351            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
352
353            warnings.push(LintWarning {
354                rule_name: Some(self.name().to_string()),
355                message,
356                line: start_line,
357                column: start_col,
358                end_line,
359                end_column: end_col,
360                severity: Severity::Warning,
361                fix,
362            });
363        }
364
365        // If reflow is enabled, generate paragraph-based fixes
366        if effective_config.reflow {
367            let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
368            // Merge paragraph warnings with line warnings, removing duplicates
369            for pw in paragraph_warnings {
370                // Remove any line warnings that overlap with this paragraph
371                warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
372                warnings.push(pw);
373            }
374        }
375
376        Ok(warnings)
377    }
378
379    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
380        // For CLI usage, apply fixes from warnings
381        // LSP will use the warning-based fixes directly
382        let warnings = self.check(ctx)?;
383
384        // If there are no fixes, return content unchanged
385        if !warnings.iter().any(|w| w.fix.is_some()) {
386            return Ok(ctx.content.to_string());
387        }
388
389        // Apply warning-based fixes
390        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
391            .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
392    }
393
394    fn as_any(&self) -> &dyn std::any::Any {
395        self
396    }
397
398    fn category(&self) -> RuleCategory {
399        RuleCategory::Whitespace
400    }
401
402    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
403        self.should_skip_with_config(ctx, &self.config)
404    }
405
406    fn default_config_section(&self) -> Option<(String, toml::Value)> {
407        let default_config = MD013Config::default();
408        let json_value = serde_json::to_value(&default_config).ok()?;
409        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
410
411        if let toml::Value::Table(table) = toml_value {
412            if !table.is_empty() {
413                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
414            } else {
415                None
416            }
417        } else {
418            None
419        }
420    }
421
422    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
423        let mut aliases = std::collections::HashMap::new();
424        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
425        Some(aliases)
426    }
427
428    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
429    where
430        Self: Sized,
431    {
432        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
433        // Use global line_length if rule-specific config still has default value
434        if rule_config.line_length.get() == 80 {
435            rule_config.line_length = config.global.line_length;
436        }
437        Box::new(Self::from_config_struct(rule_config))
438    }
439}
440
441impl MD013LineLength {
442    /// Generate paragraph-based fixes
443    fn generate_paragraph_fixes(
444        &self,
445        ctx: &crate::lint_context::LintContext,
446        config: &MD013Config,
447        lines: &[&str],
448    ) -> Vec<LintWarning> {
449        let mut warnings = Vec::new();
450        let line_index = LineIndex::new(ctx.content);
451
452        let mut i = 0;
453        while i < lines.len() {
454            let line_num = i + 1;
455
456            // Skip special structures
457            let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
458                info.in_code_block
459                    || info.in_front_matter
460                    || info.in_html_block
461                    || info.in_html_comment
462                    || info.in_esm_block
463                    || info.in_jsx_expression
464                    || info.in_mdx_comment
465            });
466
467            if should_skip_due_to_line_info
468                || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
469                || lines[i].trim().starts_with('#')
470                || TableUtils::is_potential_table_row(lines[i])
471                || lines[i].trim().is_empty()
472                || is_horizontal_rule(lines[i].trim())
473                || is_template_directive_only(lines[i])
474            {
475                i += 1;
476                continue;
477            }
478
479            // Helper function to detect semantic line markers
480            let is_semantic_line = |content: &str| -> bool {
481                let trimmed = content.trim_start();
482                let semantic_markers = [
483                    "NOTE:",
484                    "WARNING:",
485                    "IMPORTANT:",
486                    "CAUTION:",
487                    "TIP:",
488                    "DANGER:",
489                    "HINT:",
490                    "INFO:",
491                ];
492                semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
493            };
494
495            // Helper function to detect fence markers (opening or closing)
496            let is_fence_marker = |content: &str| -> bool {
497                let trimmed = content.trim_start();
498                trimmed.starts_with("```") || trimmed.starts_with("~~~")
499            };
500
501            // Check if this is a list item - handle it specially
502            let trimmed = lines[i].trim();
503            if is_list_item(trimmed) {
504                // Collect the entire list item including continuation lines
505                let list_start = i;
506                let (marker, first_content) = extract_list_marker_and_content(lines[i]);
507                let marker_len = marker.len();
508
509                // Track lines and their types (content, code block, fence, nested list)
510                #[derive(Clone)]
511                enum LineType {
512                    Content(String),
513                    CodeBlock(String, usize),      // content and original indent
514                    NestedListItem(String, usize), // full line content and original indent
515                    SemanticLine(String),          // Lines starting with NOTE:, WARNING:, etc that should stay separate
516                    SnippetLine(String),           // MkDocs Snippets delimiters (-8<-) that must stay on their own line
517                    Empty,
518                }
519
520                let mut actual_indent: Option<usize> = None;
521                let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
522                i += 1;
523
524                // Collect continuation lines using ctx.lines for metadata
525                while i < lines.len() {
526                    let line_info = &ctx.lines[i];
527
528                    // Use pre-computed is_blank from ctx
529                    if line_info.is_blank {
530                        // Empty line - check if next line is indented (part of list item)
531                        if i + 1 < lines.len() {
532                            let next_info = &ctx.lines[i + 1];
533
534                            // Check if next line is indented enough to be continuation
535                            if !next_info.is_blank && next_info.indent >= marker_len {
536                                // This blank line is between paragraphs/blocks in the list item
537                                list_item_lines.push(LineType::Empty);
538                                i += 1;
539                                continue;
540                            }
541                        }
542                        // No indented line after blank, end of list item
543                        break;
544                    }
545
546                    // Use pre-computed indent from ctx
547                    let indent = line_info.indent;
548
549                    // Valid continuation must be indented at least marker_len
550                    if indent >= marker_len {
551                        let trimmed = line_info.content(ctx.content).trim();
552
553                        // Use pre-computed in_code_block from ctx
554                        if line_info.in_code_block {
555                            list_item_lines.push(LineType::CodeBlock(
556                                line_info.content(ctx.content)[indent..].to_string(),
557                                indent,
558                            ));
559                            i += 1;
560                            continue;
561                        }
562
563                        // Check if this is a SIBLING list item (breaks parent)
564                        // Nested lists are indented >= marker_len and are PART of the parent item
565                        // Siblings are at indent < marker_len (at or before parent marker)
566                        if is_list_item(trimmed) && indent < marker_len {
567                            // This is a sibling item at same or higher level - end parent item
568                            break;
569                        }
570
571                        // Check if this is a NESTED list item marker
572                        // Nested lists should be processed separately UNLESS they're part of a
573                        // multi-paragraph list item (indicated by a blank line before them OR
574                        // it's a continuation of an already-started nested list)
575                        if is_list_item(trimmed) && indent >= marker_len {
576                            // Check if there was a blank line before this (multi-paragraph context)
577                            let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
578
579                            // Check if we've already seen nested list content (another nested item)
580                            let has_nested_content = list_item_lines.iter().any(|line| {
581                                matches!(line, LineType::Content(c) if is_list_item(c.trim()))
582                                    || matches!(line, LineType::NestedListItem(_, _))
583                            });
584
585                            if !has_blank_before && !has_nested_content {
586                                // Single-paragraph context with no prior nested items: starts a new item
587                                // End parent collection; nested list will be processed next
588                                break;
589                            }
590                            // else: multi-paragraph context or continuation of nested list, keep collecting
591                            // Mark this as a nested list item to preserve its structure
592                            list_item_lines.push(LineType::NestedListItem(
593                                line_info.content(ctx.content)[indent..].to_string(),
594                                indent,
595                            ));
596                            i += 1;
597                            continue;
598                        }
599
600                        // Normal continuation: marker_len to marker_len+3
601                        if indent <= marker_len + 3 {
602                            // Set actual_indent from first non-code continuation if not set
603                            if actual_indent.is_none() {
604                                actual_indent = Some(indent);
605                            }
606
607                            // Extract content (remove indentation and trailing whitespace)
608                            // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
609                            // See: https://github.com/rvben/rumdl/issues/76
610                            let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
611
612                            // Check if this is a fence marker (opening or closing)
613                            // These should be treated as code block lines, not paragraph content
614                            if is_fence_marker(&content) {
615                                list_item_lines.push(LineType::CodeBlock(content, indent));
616                            }
617                            // Check if this is a semantic line (NOTE:, WARNING:, etc.)
618                            else if is_semantic_line(&content) {
619                                list_item_lines.push(LineType::SemanticLine(content));
620                            }
621                            // Check if this is a snippet block delimiter (-8<- or --8<--)
622                            // These must be preserved on their own lines for MkDocs Snippets extension
623                            else if is_snippet_block_delimiter(&content) {
624                                list_item_lines.push(LineType::SnippetLine(content));
625                            } else {
626                                list_item_lines.push(LineType::Content(content));
627                            }
628                            i += 1;
629                        } else {
630                            // indent >= marker_len + 4: indented code block
631                            list_item_lines.push(LineType::CodeBlock(
632                                line_info.content(ctx.content)[indent..].to_string(),
633                                indent,
634                            ));
635                            i += 1;
636                        }
637                    } else {
638                        // Not indented enough, end of list item
639                        break;
640                    }
641                }
642
643                // Use detected indent or fallback to marker length
644                let indent_size = actual_indent.unwrap_or(marker_len);
645                let expected_indent = " ".repeat(indent_size);
646
647                // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
648                #[derive(Clone)]
649                enum Block {
650                    Paragraph(Vec<String>),
651                    Code {
652                        lines: Vec<(String, usize)>, // (content, indent) pairs
653                        has_preceding_blank: bool,   // Whether there was a blank line before this block
654                    },
655                    NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
656                    SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
657                    SnippetLine(String),  // MkDocs Snippets delimiter that stays on its own line without extra spacing
658                    Html {
659                        lines: Vec<String>,        // HTML content preserved exactly as-is
660                        has_preceding_blank: bool, // Whether there was a blank line before this block
661                    },
662                }
663
664                // HTML tag detection helpers
665                // Block-level HTML tags that should trigger HTML block detection
666                const BLOCK_LEVEL_TAGS: &[&str] = &[
667                    "div",
668                    "details",
669                    "summary",
670                    "section",
671                    "article",
672                    "header",
673                    "footer",
674                    "nav",
675                    "aside",
676                    "main",
677                    "table",
678                    "thead",
679                    "tbody",
680                    "tfoot",
681                    "tr",
682                    "td",
683                    "th",
684                    "ul",
685                    "ol",
686                    "li",
687                    "dl",
688                    "dt",
689                    "dd",
690                    "pre",
691                    "blockquote",
692                    "figure",
693                    "figcaption",
694                    "form",
695                    "fieldset",
696                    "legend",
697                    "hr",
698                    "p",
699                    "h1",
700                    "h2",
701                    "h3",
702                    "h4",
703                    "h5",
704                    "h6",
705                    "style",
706                    "script",
707                    "noscript",
708                ];
709
710                fn is_block_html_opening_tag(line: &str) -> Option<String> {
711                    let trimmed = line.trim();
712
713                    // Check for HTML comments
714                    if trimmed.starts_with("<!--") {
715                        return Some("!--".to_string());
716                    }
717
718                    // Check for opening tags
719                    if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
720                        // Extract tag name from <tagname ...> or <tagname>
721                        let after_bracket = &trimmed[1..];
722                        if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
723                            let tag_name = after_bracket[..end].to_lowercase();
724
725                            // Only treat as block if it's a known block-level tag
726                            if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
727                                return Some(tag_name);
728                            }
729                        }
730                    }
731                    None
732                }
733
734                fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
735                    let trimmed = line.trim();
736
737                    // Special handling for HTML comments
738                    if tag_name == "!--" {
739                        return trimmed.ends_with("-->");
740                    }
741
742                    // Check for closing tags: </tagname> or </tagname ...>
743                    trimmed.starts_with(&format!("</{tag_name}>"))
744                        || trimmed.starts_with(&format!("</{tag_name}  "))
745                        || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
746                }
747
748                fn is_self_closing_tag(line: &str) -> bool {
749                    let trimmed = line.trim();
750                    trimmed.ends_with("/>")
751                }
752
753                let mut blocks: Vec<Block> = Vec::new();
754                let mut current_paragraph: Vec<String> = Vec::new();
755                let mut current_code_block: Vec<(String, usize)> = Vec::new();
756                let mut current_nested_list: Vec<(String, usize)> = Vec::new();
757                let mut current_html_block: Vec<String> = Vec::new();
758                let mut html_tag_stack: Vec<String> = Vec::new();
759                let mut in_code = false;
760                let mut in_nested_list = false;
761                let mut in_html_block = false;
762                let mut had_preceding_blank = false; // Track if we just saw an empty line
763                let mut code_block_has_preceding_blank = false; // Track blank before current code block
764                let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
765
766                for line in &list_item_lines {
767                    match line {
768                        LineType::Empty => {
769                            if in_code {
770                                current_code_block.push((String::new(), 0));
771                            } else if in_nested_list {
772                                current_nested_list.push((String::new(), 0));
773                            } else if in_html_block {
774                                // Allow blank lines inside HTML blocks
775                                current_html_block.push(String::new());
776                            } else if !current_paragraph.is_empty() {
777                                blocks.push(Block::Paragraph(current_paragraph.clone()));
778                                current_paragraph.clear();
779                            }
780                            // Mark that we saw a blank line
781                            had_preceding_blank = true;
782                        }
783                        LineType::Content(content) => {
784                            // Check if we're currently in an HTML block
785                            if in_html_block {
786                                current_html_block.push(content.clone());
787
788                                // Check if this line closes any open HTML tags
789                                if let Some(last_tag) = html_tag_stack.last() {
790                                    if is_html_closing_tag(content, last_tag) {
791                                        html_tag_stack.pop();
792
793                                        // If stack is empty, HTML block is complete
794                                        if html_tag_stack.is_empty() {
795                                            blocks.push(Block::Html {
796                                                lines: current_html_block.clone(),
797                                                has_preceding_blank: html_block_has_preceding_blank,
798                                            });
799                                            current_html_block.clear();
800                                            in_html_block = false;
801                                        }
802                                    } else if let Some(new_tag) = is_block_html_opening_tag(content) {
803                                        // Nested opening tag within HTML block
804                                        if !is_self_closing_tag(content) {
805                                            html_tag_stack.push(new_tag);
806                                        }
807                                    }
808                                }
809                                had_preceding_blank = false;
810                            } else {
811                                // Not in HTML block - check if this line starts one
812                                if let Some(tag_name) = is_block_html_opening_tag(content) {
813                                    // Flush current paragraph before starting HTML block
814                                    if in_code {
815                                        blocks.push(Block::Code {
816                                            lines: current_code_block.clone(),
817                                            has_preceding_blank: code_block_has_preceding_blank,
818                                        });
819                                        current_code_block.clear();
820                                        in_code = false;
821                                    } else if in_nested_list {
822                                        blocks.push(Block::NestedList(current_nested_list.clone()));
823                                        current_nested_list.clear();
824                                        in_nested_list = false;
825                                    } else if !current_paragraph.is_empty() {
826                                        blocks.push(Block::Paragraph(current_paragraph.clone()));
827                                        current_paragraph.clear();
828                                    }
829
830                                    // Start new HTML block
831                                    in_html_block = true;
832                                    html_block_has_preceding_blank = had_preceding_blank;
833                                    current_html_block.push(content.clone());
834
835                                    // Check if it's self-closing or needs a closing tag
836                                    if is_self_closing_tag(content) {
837                                        // Self-closing tag - complete the HTML block immediately
838                                        blocks.push(Block::Html {
839                                            lines: current_html_block.clone(),
840                                            has_preceding_blank: html_block_has_preceding_blank,
841                                        });
842                                        current_html_block.clear();
843                                        in_html_block = false;
844                                    } else {
845                                        // Regular opening tag - push to stack
846                                        html_tag_stack.push(tag_name);
847                                    }
848                                } else {
849                                    // Regular content line - add to paragraph
850                                    if in_code {
851                                        // Switching from code to content
852                                        blocks.push(Block::Code {
853                                            lines: current_code_block.clone(),
854                                            has_preceding_blank: code_block_has_preceding_blank,
855                                        });
856                                        current_code_block.clear();
857                                        in_code = false;
858                                    } else if in_nested_list {
859                                        // Switching from nested list to content
860                                        blocks.push(Block::NestedList(current_nested_list.clone()));
861                                        current_nested_list.clear();
862                                        in_nested_list = false;
863                                    }
864                                    current_paragraph.push(content.clone());
865                                }
866                                had_preceding_blank = false; // Reset after content
867                            }
868                        }
869                        LineType::CodeBlock(content, indent) => {
870                            if in_nested_list {
871                                // Switching from nested list to code
872                                blocks.push(Block::NestedList(current_nested_list.clone()));
873                                current_nested_list.clear();
874                                in_nested_list = false;
875                            } else if in_html_block {
876                                // Switching from HTML block to code (shouldn't happen normally, but handle it)
877                                blocks.push(Block::Html {
878                                    lines: current_html_block.clone(),
879                                    has_preceding_blank: html_block_has_preceding_blank,
880                                });
881                                current_html_block.clear();
882                                html_tag_stack.clear();
883                                in_html_block = false;
884                            }
885                            if !in_code {
886                                // Switching from content to code
887                                if !current_paragraph.is_empty() {
888                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
889                                    current_paragraph.clear();
890                                }
891                                in_code = true;
892                                // Record whether there was a blank line before this code block
893                                code_block_has_preceding_blank = had_preceding_blank;
894                            }
895                            current_code_block.push((content.clone(), *indent));
896                            had_preceding_blank = false; // Reset after code
897                        }
898                        LineType::NestedListItem(content, indent) => {
899                            if in_code {
900                                // Switching from code to nested list
901                                blocks.push(Block::Code {
902                                    lines: current_code_block.clone(),
903                                    has_preceding_blank: code_block_has_preceding_blank,
904                                });
905                                current_code_block.clear();
906                                in_code = false;
907                            } else if in_html_block {
908                                // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
909                                blocks.push(Block::Html {
910                                    lines: current_html_block.clone(),
911                                    has_preceding_blank: html_block_has_preceding_blank,
912                                });
913                                current_html_block.clear();
914                                html_tag_stack.clear();
915                                in_html_block = false;
916                            }
917                            if !in_nested_list {
918                                // Switching from content to nested list
919                                if !current_paragraph.is_empty() {
920                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
921                                    current_paragraph.clear();
922                                }
923                                in_nested_list = true;
924                            }
925                            current_nested_list.push((content.clone(), *indent));
926                            had_preceding_blank = false; // Reset after nested list
927                        }
928                        LineType::SemanticLine(content) => {
929                            // Semantic lines are standalone - flush any current block and add as separate block
930                            if in_code {
931                                blocks.push(Block::Code {
932                                    lines: current_code_block.clone(),
933                                    has_preceding_blank: code_block_has_preceding_blank,
934                                });
935                                current_code_block.clear();
936                                in_code = false;
937                            } else if in_nested_list {
938                                blocks.push(Block::NestedList(current_nested_list.clone()));
939                                current_nested_list.clear();
940                                in_nested_list = false;
941                            } else if in_html_block {
942                                blocks.push(Block::Html {
943                                    lines: current_html_block.clone(),
944                                    has_preceding_blank: html_block_has_preceding_blank,
945                                });
946                                current_html_block.clear();
947                                html_tag_stack.clear();
948                                in_html_block = false;
949                            } else if !current_paragraph.is_empty() {
950                                blocks.push(Block::Paragraph(current_paragraph.clone()));
951                                current_paragraph.clear();
952                            }
953                            // Add semantic line as its own block
954                            blocks.push(Block::SemanticLine(content.clone()));
955                            had_preceding_blank = false; // Reset after semantic line
956                        }
957                        LineType::SnippetLine(content) => {
958                            // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
959                            // Unlike semantic lines, snippet lines don't add extra blank lines around them
960                            if in_code {
961                                blocks.push(Block::Code {
962                                    lines: current_code_block.clone(),
963                                    has_preceding_blank: code_block_has_preceding_blank,
964                                });
965                                current_code_block.clear();
966                                in_code = false;
967                            } else if in_nested_list {
968                                blocks.push(Block::NestedList(current_nested_list.clone()));
969                                current_nested_list.clear();
970                                in_nested_list = false;
971                            } else if in_html_block {
972                                blocks.push(Block::Html {
973                                    lines: current_html_block.clone(),
974                                    has_preceding_blank: html_block_has_preceding_blank,
975                                });
976                                current_html_block.clear();
977                                html_tag_stack.clear();
978                                in_html_block = false;
979                            } else if !current_paragraph.is_empty() {
980                                blocks.push(Block::Paragraph(current_paragraph.clone()));
981                                current_paragraph.clear();
982                            }
983                            // Add snippet line as its own block
984                            blocks.push(Block::SnippetLine(content.clone()));
985                            had_preceding_blank = false;
986                        }
987                    }
988                }
989
990                // Push remaining block
991                if in_code && !current_code_block.is_empty() {
992                    blocks.push(Block::Code {
993                        lines: current_code_block,
994                        has_preceding_blank: code_block_has_preceding_blank,
995                    });
996                } else if in_nested_list && !current_nested_list.is_empty() {
997                    blocks.push(Block::NestedList(current_nested_list));
998                } else if in_html_block && !current_html_block.is_empty() {
999                    // If we still have an unclosed HTML block, push it anyway
1000                    // (malformed HTML - missing closing tag)
1001                    blocks.push(Block::Html {
1002                        lines: current_html_block,
1003                        has_preceding_blank: html_block_has_preceding_blank,
1004                    });
1005                } else if !current_paragraph.is_empty() {
1006                    blocks.push(Block::Paragraph(current_paragraph));
1007                }
1008
1009                // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1010                let content_lines: Vec<String> = list_item_lines
1011                    .iter()
1012                    .filter_map(|line| {
1013                        if let LineType::Content(s) = line {
1014                            Some(s.clone())
1015                        } else {
1016                            None
1017                        }
1018                    })
1019                    .collect();
1020
1021                // Check if we need to reflow this list item
1022                // We check the combined content to see if it exceeds length limits
1023                let combined_content = content_lines.join(" ").trim().to_string();
1024                let full_line = format!("{marker}{combined_content}");
1025
1026                // Helper to check if we should reflow in normalize mode
1027                let should_normalize = || {
1028                    // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1029                    // DO normalize if it has plain text content that spans multiple lines
1030                    let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1031                    let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1032                    let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1033                    let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1034                    let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1035
1036                    // If we have nested lists, code blocks, semantic lines, or snippet lines but no paragraphs, don't normalize
1037                    if (has_nested_lists || has_code_blocks || has_semantic_lines || has_snippet_lines)
1038                        && !has_paragraphs
1039                    {
1040                        return false;
1041                    }
1042
1043                    // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1044                    if has_paragraphs {
1045                        let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1046                        if paragraph_count > 1 {
1047                            // Multiple paragraph blocks should be normalized
1048                            return true;
1049                        }
1050
1051                        // Single paragraph block: normalize if it has multiple content lines
1052                        if content_lines.len() > 1 {
1053                            return true;
1054                        }
1055                    }
1056
1057                    false
1058                };
1059
1060                let needs_reflow = match config.reflow_mode {
1061                    ReflowMode::Normalize => {
1062                        // Only reflow if:
1063                        // 1. The combined line would exceed the limit, OR
1064                        // 2. The list item should be normalized (has multi-line plain text)
1065                        let combined_length = self.calculate_effective_length(&full_line);
1066                        if combined_length > config.line_length.get() {
1067                            true
1068                        } else {
1069                            should_normalize()
1070                        }
1071                    }
1072                    ReflowMode::SentencePerLine => {
1073                        // Check if list item has multiple sentences
1074                        let sentences = split_into_sentences(&combined_content);
1075                        sentences.len() > 1
1076                    }
1077                    ReflowMode::Default => {
1078                        // In default mode, only reflow if any individual line exceeds limit
1079                        // Check the original lines, not the combined content
1080                        (list_start..i)
1081                            .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1082                    }
1083                };
1084
1085                if needs_reflow {
1086                    let start_range = line_index.whole_line_range(list_start + 1);
1087                    let end_line = i - 1;
1088                    let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1089                        line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1090                    } else {
1091                        line_index.whole_line_range(end_line + 1)
1092                    };
1093                    let byte_range = start_range.start..end_range.end;
1094
1095                    // Reflow each block (paragraphs only, preserve code blocks)
1096                    // When line_length = 0 (no limit), use a very large value for reflow
1097                    let reflow_line_length = if config.line_length.is_unlimited() {
1098                        usize::MAX
1099                    } else {
1100                        config.line_length.get().saturating_sub(indent_size).max(1)
1101                    };
1102                    let reflow_options = crate::utils::text_reflow::ReflowOptions {
1103                        line_length: reflow_line_length,
1104                        break_on_sentences: true,
1105                        preserve_breaks: false,
1106                        sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1107                        abbreviations: config.abbreviations_for_reflow(),
1108                    };
1109
1110                    let mut result: Vec<String> = Vec::new();
1111                    let mut is_first_block = true;
1112
1113                    for (block_idx, block) in blocks.iter().enumerate() {
1114                        match block {
1115                            Block::Paragraph(para_lines) => {
1116                                // Split the paragraph into segments at hard break boundaries
1117                                // Each segment can be reflowed independently
1118                                let segments = split_into_segments(para_lines);
1119
1120                                for (segment_idx, segment) in segments.iter().enumerate() {
1121                                    // Check if this segment ends with a hard break and what type
1122                                    let hard_break_type = segment.last().and_then(|line| {
1123                                        let line = line.strip_suffix('\r').unwrap_or(line);
1124                                        if line.ends_with('\\') {
1125                                            Some("\\")
1126                                        } else if line.ends_with("  ") {
1127                                            Some("  ")
1128                                        } else {
1129                                            None
1130                                        }
1131                                    });
1132
1133                                    // Join and reflow the segment (removing the hard break marker for processing)
1134                                    let segment_for_reflow: Vec<String> = segment
1135                                        .iter()
1136                                        .map(|line| {
1137                                            // Strip hard break marker (2 spaces or backslash) for reflow processing
1138                                            if line.ends_with('\\') {
1139                                                line[..line.len() - 1].trim_end().to_string()
1140                                            } else if line.ends_with("  ") {
1141                                                line[..line.len() - 2].trim_end().to_string()
1142                                            } else {
1143                                                line.clone()
1144                                            }
1145                                        })
1146                                        .collect();
1147
1148                                    let segment_text = segment_for_reflow.join(" ").trim().to_string();
1149                                    if !segment_text.is_empty() {
1150                                        let reflowed =
1151                                            crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1152
1153                                        if is_first_block && segment_idx == 0 {
1154                                            // First segment of first block starts with marker
1155                                            result.push(format!("{marker}{}", reflowed[0]));
1156                                            for line in reflowed.iter().skip(1) {
1157                                                result.push(format!("{expected_indent}{line}"));
1158                                            }
1159                                            is_first_block = false;
1160                                        } else {
1161                                            // Subsequent segments
1162                                            for line in reflowed {
1163                                                result.push(format!("{expected_indent}{line}"));
1164                                            }
1165                                        }
1166
1167                                        // If this segment had a hard break, add it back to the last line
1168                                        // Preserve the original hard break format (backslash or two spaces)
1169                                        if let Some(break_marker) = hard_break_type
1170                                            && let Some(last_line) = result.last_mut()
1171                                        {
1172                                            last_line.push_str(break_marker);
1173                                        }
1174                                    }
1175                                }
1176
1177                                // Add blank line after paragraph block if there's a next block
1178                                // BUT: check if next block is a code block that doesn't want a preceding blank
1179                                // Also don't add blank lines before snippet lines (they should stay tight)
1180                                if block_idx < blocks.len() - 1 {
1181                                    let next_block = &blocks[block_idx + 1];
1182                                    let should_add_blank = match next_block {
1183                                        Block::Code {
1184                                            has_preceding_blank, ..
1185                                        } => *has_preceding_blank,
1186                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1187                                        _ => true,                      // For all other blocks, add blank line
1188                                    };
1189                                    if should_add_blank {
1190                                        result.push(String::new());
1191                                    }
1192                                }
1193                            }
1194                            Block::Code {
1195                                lines: code_lines,
1196                                has_preceding_blank: _,
1197                            } => {
1198                                // Preserve code blocks as-is with original indentation
1199                                // NOTE: Blank line before code block is handled by the previous block
1200                                // (see paragraph block's logic above)
1201
1202                                for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1203                                    if is_first_block && idx == 0 {
1204                                        // First line of first block gets marker
1205                                        result.push(format!(
1206                                            "{marker}{}",
1207                                            " ".repeat(orig_indent - marker_len) + content
1208                                        ));
1209                                        is_first_block = false;
1210                                    } else if content.is_empty() {
1211                                        result.push(String::new());
1212                                    } else {
1213                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1214                                    }
1215                                }
1216                            }
1217                            Block::NestedList(nested_items) => {
1218                                // Preserve nested list items as-is with original indentation
1219                                if !is_first_block {
1220                                    result.push(String::new());
1221                                }
1222
1223                                for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1224                                    if is_first_block && idx == 0 {
1225                                        // First line of first block gets marker
1226                                        result.push(format!(
1227                                            "{marker}{}",
1228                                            " ".repeat(orig_indent - marker_len) + content
1229                                        ));
1230                                        is_first_block = false;
1231                                    } else if content.is_empty() {
1232                                        result.push(String::new());
1233                                    } else {
1234                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1235                                    }
1236                                }
1237
1238                                // Add blank line after nested list if there's a next block
1239                                // Check if next block is a code block that doesn't want a preceding blank
1240                                if block_idx < blocks.len() - 1 {
1241                                    let next_block = &blocks[block_idx + 1];
1242                                    let should_add_blank = match next_block {
1243                                        Block::Code {
1244                                            has_preceding_blank, ..
1245                                        } => *has_preceding_blank,
1246                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1247                                        _ => true,                      // For all other blocks, add blank line
1248                                    };
1249                                    if should_add_blank {
1250                                        result.push(String::new());
1251                                    }
1252                                }
1253                            }
1254                            Block::SemanticLine(content) => {
1255                                // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1256                                // Add blank line before if not first block
1257                                if !is_first_block {
1258                                    result.push(String::new());
1259                                }
1260
1261                                if is_first_block {
1262                                    // First block starts with marker
1263                                    result.push(format!("{marker}{content}"));
1264                                    is_first_block = false;
1265                                } else {
1266                                    // Subsequent blocks use expected indent
1267                                    result.push(format!("{expected_indent}{content}"));
1268                                }
1269
1270                                // Add blank line after semantic line if there's a next block
1271                                // Check if next block is a code block that doesn't want a preceding blank
1272                                if block_idx < blocks.len() - 1 {
1273                                    let next_block = &blocks[block_idx + 1];
1274                                    let should_add_blank = match next_block {
1275                                        Block::Code {
1276                                            has_preceding_blank, ..
1277                                        } => *has_preceding_blank,
1278                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1279                                        _ => true,                      // For all other blocks, add blank line
1280                                    };
1281                                    if should_add_blank {
1282                                        result.push(String::new());
1283                                    }
1284                                }
1285                            }
1286                            Block::SnippetLine(content) => {
1287                                // Preserve snippet delimiters (-8<-) as-is on their own line
1288                                // Unlike semantic lines, snippet lines don't add extra blank lines
1289                                if is_first_block {
1290                                    // First block starts with marker
1291                                    result.push(format!("{marker}{content}"));
1292                                    is_first_block = false;
1293                                } else {
1294                                    // Subsequent blocks use expected indent
1295                                    result.push(format!("{expected_indent}{content}"));
1296                                }
1297                                // No blank lines added before or after snippet delimiters
1298                            }
1299                            Block::Html {
1300                                lines: html_lines,
1301                                has_preceding_blank: _,
1302                            } => {
1303                                // Preserve HTML blocks exactly as-is with original indentation
1304                                // NOTE: Blank line before HTML block is handled by the previous block
1305
1306                                for (idx, line) in html_lines.iter().enumerate() {
1307                                    if is_first_block && idx == 0 {
1308                                        // First line of first block gets marker
1309                                        result.push(format!("{marker}{line}"));
1310                                        is_first_block = false;
1311                                    } else if line.is_empty() {
1312                                        // Preserve blank lines inside HTML blocks
1313                                        result.push(String::new());
1314                                    } else {
1315                                        // Preserve lines with their original content (already includes indentation)
1316                                        result.push(format!("{expected_indent}{line}"));
1317                                    }
1318                                }
1319
1320                                // Add blank line after HTML block if there's a next block
1321                                if block_idx < blocks.len() - 1 {
1322                                    let next_block = &blocks[block_idx + 1];
1323                                    let should_add_blank = match next_block {
1324                                        Block::Code {
1325                                            has_preceding_blank, ..
1326                                        } => *has_preceding_blank,
1327                                        Block::Html {
1328                                            has_preceding_blank, ..
1329                                        } => *has_preceding_blank,
1330                                        Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1331                                        _ => true,                      // For all other blocks, add blank line
1332                                    };
1333                                    if should_add_blank {
1334                                        result.push(String::new());
1335                                    }
1336                                }
1337                            }
1338                        }
1339                    }
1340
1341                    let reflowed_text = result.join("\n");
1342
1343                    // Preserve trailing newline
1344                    let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1345                        format!("{reflowed_text}\n")
1346                    } else {
1347                        reflowed_text
1348                    };
1349
1350                    // Get the original text to compare
1351                    let original_text = &ctx.content[byte_range.clone()];
1352
1353                    // Only generate a warning if the replacement is different from the original
1354                    if original_text != replacement {
1355                        // Generate an appropriate message based on why reflow is needed
1356                        let message = match config.reflow_mode {
1357                            ReflowMode::SentencePerLine => {
1358                                let num_sentences = split_into_sentences(&combined_content).len();
1359                                let num_lines = content_lines.len();
1360                                if num_lines == 1 {
1361                                    // Single line with multiple sentences
1362                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1363                                } else {
1364                                    // Multiple lines - could be split sentences or mixed
1365                                    format!(
1366                                        "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1367                                    )
1368                                }
1369                            }
1370                            ReflowMode::Normalize => {
1371                                let combined_length = self.calculate_effective_length(&full_line);
1372                                if combined_length > config.line_length.get() {
1373                                    format!(
1374                                        "Line length {} exceeds {} characters",
1375                                        combined_length,
1376                                        config.line_length.get()
1377                                    )
1378                                } else {
1379                                    "Multi-line content can be normalized".to_string()
1380                                }
1381                            }
1382                            ReflowMode::Default => {
1383                                let combined_length = self.calculate_effective_length(&full_line);
1384                                format!(
1385                                    "Line length {} exceeds {} characters",
1386                                    combined_length,
1387                                    config.line_length.get()
1388                                )
1389                            }
1390                        };
1391
1392                        warnings.push(LintWarning {
1393                            rule_name: Some(self.name().to_string()),
1394                            message,
1395                            line: list_start + 1,
1396                            column: 1,
1397                            end_line: end_line + 1,
1398                            end_column: lines[end_line].len() + 1,
1399                            severity: Severity::Warning,
1400                            fix: Some(crate::rule::Fix {
1401                                range: byte_range,
1402                                replacement,
1403                            }),
1404                        });
1405                    }
1406                }
1407                continue;
1408            }
1409
1410            // Found start of a paragraph - collect all lines in it
1411            let paragraph_start = i;
1412            let mut paragraph_lines = vec![lines[i]];
1413            i += 1;
1414
1415            while i < lines.len() {
1416                let next_line = lines[i];
1417                let next_line_num = i + 1;
1418                let next_trimmed = next_line.trim();
1419
1420                // Stop at paragraph boundaries
1421                if next_trimmed.is_empty()
1422                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1423                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1424                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1425                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1426                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1427                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1428                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1429                    || (next_line_num > 0
1430                        && next_line_num <= ctx.lines.len()
1431                        && ctx.lines[next_line_num - 1].blockquote.is_some())
1432                    || next_trimmed.starts_with('#')
1433                    || TableUtils::is_potential_table_row(next_line)
1434                    || is_list_item(next_trimmed)
1435                    || is_horizontal_rule(next_trimmed)
1436                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1437                    || is_template_directive_only(next_line)
1438                    || is_standalone_attr_list(next_line)
1439                    || is_snippet_block_delimiter(next_line)
1440                {
1441                    break;
1442                }
1443
1444                // Check if the previous line ends with a hard break (2+ spaces or backslash)
1445                if i > 0 && has_hard_break(lines[i - 1]) {
1446                    // Don't include lines after hard breaks in the same paragraph
1447                    break;
1448                }
1449
1450                paragraph_lines.push(next_line);
1451                i += 1;
1452            }
1453
1454            // Combine paragraph lines into a single string for processing
1455            // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1456            let paragraph_text = paragraph_lines.join(" ");
1457
1458            // Skip reflowing if this paragraph contains definition list items
1459            // Definition lists are multi-line structures that should not be joined
1460            let contains_definition_list = paragraph_lines
1461                .iter()
1462                .any(|line| crate::utils::is_definition_list_item(line));
1463
1464            if contains_definition_list {
1465                // Don't reflow definition lists - skip this paragraph
1466                i = paragraph_start + paragraph_lines.len();
1467                continue;
1468            }
1469
1470            // Skip reflowing if this paragraph contains MkDocs Snippets markers
1471            // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1472            let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1473
1474            if contains_snippets {
1475                // Don't reflow Snippets blocks - skip this paragraph
1476                i = paragraph_start + paragraph_lines.len();
1477                continue;
1478            }
1479
1480            // Check if this paragraph needs reflowing
1481            let needs_reflow = match config.reflow_mode {
1482                ReflowMode::Normalize => {
1483                    // In normalize mode, reflow multi-line paragraphs
1484                    paragraph_lines.len() > 1
1485                }
1486                ReflowMode::SentencePerLine => {
1487                    // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1488                    // Note: we check the joined text because sentences can span multiple lines
1489                    let sentences = split_into_sentences(&paragraph_text);
1490
1491                    // Always reflow if multiple sentences on one line
1492                    if sentences.len() > 1 {
1493                        true
1494                    } else if paragraph_lines.len() > 1 {
1495                        // For single-sentence paragraphs spanning multiple lines:
1496                        // Reflow if they COULD fit on one line (respecting line-length constraint)
1497                        if config.line_length.is_unlimited() {
1498                            // No line-length constraint - always join single sentences
1499                            true
1500                        } else {
1501                            // Only join if it fits within line-length
1502                            let effective_length = self.calculate_effective_length(&paragraph_text);
1503                            effective_length <= config.line_length.get()
1504                        }
1505                    } else {
1506                        false
1507                    }
1508                }
1509                ReflowMode::Default => {
1510                    // In default mode, only reflow if lines exceed limit
1511                    paragraph_lines
1512                        .iter()
1513                        .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1514                }
1515            };
1516
1517            if needs_reflow {
1518                // Calculate byte range for this paragraph
1519                // Use whole_line_range for each line and combine
1520                let start_range = line_index.whole_line_range(paragraph_start + 1);
1521                let end_line = paragraph_start + paragraph_lines.len() - 1;
1522
1523                // For the last line, we want to preserve any trailing newline
1524                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1525                    // Last line without trailing newline - use line_text_range
1526                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1527                } else {
1528                    // Not the last line or has trailing newline - use whole_line_range
1529                    line_index.whole_line_range(end_line + 1)
1530                };
1531
1532                let byte_range = start_range.start..end_range.end;
1533
1534                // Check if the paragraph ends with a hard break and what type
1535                let hard_break_type = paragraph_lines.last().and_then(|line| {
1536                    let line = line.strip_suffix('\r').unwrap_or(line);
1537                    if line.ends_with('\\') {
1538                        Some("\\")
1539                    } else if line.ends_with("  ") {
1540                        Some("  ")
1541                    } else {
1542                        None
1543                    }
1544                });
1545
1546                // Reflow the paragraph
1547                // When line_length = 0 (no limit), use a very large value for reflow
1548                let reflow_line_length = if config.line_length.is_unlimited() {
1549                    usize::MAX
1550                } else {
1551                    config.line_length.get()
1552                };
1553                let reflow_options = crate::utils::text_reflow::ReflowOptions {
1554                    line_length: reflow_line_length,
1555                    break_on_sentences: true,
1556                    preserve_breaks: false,
1557                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1558                    abbreviations: config.abbreviations_for_reflow(),
1559                };
1560                let mut reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
1561
1562                // If the original paragraph ended with a hard break, preserve it
1563                // Preserve the original hard break format (backslash or two spaces)
1564                if let Some(break_marker) = hard_break_type
1565                    && !reflowed.is_empty()
1566                {
1567                    let last_idx = reflowed.len() - 1;
1568                    if !has_hard_break(&reflowed[last_idx]) {
1569                        reflowed[last_idx].push_str(break_marker);
1570                    }
1571                }
1572
1573                let reflowed_text = reflowed.join("\n");
1574
1575                // Preserve trailing newline if the original paragraph had one
1576                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1577                    format!("{reflowed_text}\n")
1578                } else {
1579                    reflowed_text
1580                };
1581
1582                // Get the original text to compare
1583                let original_text = &ctx.content[byte_range.clone()];
1584
1585                // Only generate a warning if the replacement is different from the original
1586                if original_text != replacement {
1587                    // Create warning with actual fix
1588                    // In default mode, report the specific line that violates
1589                    // In normalize mode, report the whole paragraph
1590                    // In sentence-per-line mode, report the entire paragraph
1591                    let (warning_line, warning_end_line) = match config.reflow_mode {
1592                        ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1593                        ReflowMode::SentencePerLine => {
1594                            // Highlight the entire paragraph that needs reformatting
1595                            (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1596                        }
1597                        ReflowMode::Default => {
1598                            // Find the first line that exceeds the limit
1599                            let mut violating_line = paragraph_start;
1600                            for (idx, line) in paragraph_lines.iter().enumerate() {
1601                                if self.calculate_effective_length(line) > config.line_length.get() {
1602                                    violating_line = paragraph_start + idx;
1603                                    break;
1604                                }
1605                            }
1606                            (violating_line + 1, violating_line + 1)
1607                        }
1608                    };
1609
1610                    warnings.push(LintWarning {
1611                        rule_name: Some(self.name().to_string()),
1612                        message: match config.reflow_mode {
1613                            ReflowMode::Normalize => format!(
1614                                "Paragraph could be normalized to use line length of {} characters",
1615                                config.line_length.get()
1616                            ),
1617                            ReflowMode::SentencePerLine => {
1618                                let num_sentences = split_into_sentences(&paragraph_text).len();
1619                                if paragraph_lines.len() == 1 {
1620                                    // Single line with multiple sentences
1621                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1622                                } else {
1623                                    let num_lines = paragraph_lines.len();
1624                                    // Multiple lines - could be split sentences or mixed
1625                                    format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1626                                }
1627                            },
1628                            ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1629                        },
1630                        line: warning_line,
1631                        column: 1,
1632                        end_line: warning_end_line,
1633                        end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1634                        severity: Severity::Warning,
1635                        fix: Some(crate::rule::Fix {
1636                            range: byte_range,
1637                            replacement,
1638                        }),
1639                    });
1640                }
1641            }
1642        }
1643
1644        warnings
1645    }
1646
1647    /// Calculate string length based on the configured length mode
1648    fn calculate_string_length(&self, s: &str) -> usize {
1649        match self.config.length_mode {
1650            LengthMode::Chars => s.chars().count(),
1651            LengthMode::Visual => s.width(),
1652            LengthMode::Bytes => s.len(),
1653        }
1654    }
1655
1656    /// Calculate effective line length excluding unbreakable URLs
1657    fn calculate_effective_length(&self, line: &str) -> usize {
1658        if self.config.strict {
1659            // In strict mode, count everything
1660            return self.calculate_string_length(line);
1661        }
1662
1663        // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1664        let bytes = line.as_bytes();
1665        if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1666            return self.calculate_string_length(line);
1667        }
1668
1669        // More precise check for URLs and links
1670        if !line.contains("http") && !line.contains('[') {
1671            return self.calculate_string_length(line);
1672        }
1673
1674        let mut effective_line = line.to_string();
1675
1676        // First handle markdown links to avoid double-counting URLs
1677        // Pattern: [text](very-long-url) -> [text](url)
1678        if line.contains('[') && line.contains("](") {
1679            for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1680                if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1681                    && url.as_str().len() > 15
1682                {
1683                    let replacement = format!("[{}](url)", text.as_str());
1684                    effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1685                }
1686            }
1687        }
1688
1689        // Then replace bare URLs with a placeholder of reasonable length
1690        // This allows lines with long URLs to pass if the rest of the content is reasonable
1691        if effective_line.contains("http") {
1692            for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1693                let url = url_match.as_str();
1694                // Skip if this URL is already part of a markdown link we handled
1695                if !effective_line.contains(&format!("({url})")) {
1696                    // Replace URL with placeholder that represents a "reasonable" URL length
1697                    // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1698                    let placeholder = "x".repeat(15.min(url.len()));
1699                    effective_line = effective_line.replacen(url, &placeholder, 1);
1700                }
1701            }
1702        }
1703
1704        self.calculate_string_length(&effective_line)
1705    }
1706}