rumdl_lib/rules/md013_line_length/
mod.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::range_utils::LineIndex;
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9    IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18    extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19    split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30    pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35        Self {
36            config: MD013Config {
37                line_length: crate::types::LineLength::new(line_length),
38                code_blocks,
39                tables,
40                headings,
41                paragraphs: true, // Default to true for backwards compatibility
42                strict,
43                reflow: false,
44                reflow_mode: ReflowMode::default(),
45                length_mode: LengthMode::default(),
46                abbreviations: None,
47            },
48        }
49    }
50
51    pub fn from_config_struct(config: MD013Config) -> Self {
52        Self { config }
53    }
54
55    fn should_ignore_line(
56        &self,
57        line: &str,
58        _lines: &[&str],
59        current_line: usize,
60        ctx: &crate::lint_context::LintContext,
61    ) -> bool {
62        if self.config.strict {
63            return false;
64        }
65
66        // Quick check for common patterns before expensive regex
67        let trimmed = line.trim();
68
69        // Only skip if the entire line is a URL (quick check first)
70        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71            return true;
72        }
73
74        // Only skip if the entire line is an image reference (quick check first)
75        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76            return true;
77        }
78
79        // Only skip if the entire line is a link reference (quick check first)
80        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81            return true;
82        }
83
84        // Code blocks with long strings (only check if in code block)
85        if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86            && !trimmed.is_empty()
87            && !line.contains(' ')
88            && !line.contains('\t')
89        {
90            return true;
91        }
92
93        false
94    }
95
96    /// Check if rule should skip based on provided config (used for inline config support)
97    fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
98        // Skip if content is empty
99        if ctx.content.is_empty() {
100            return true;
101        }
102
103        // For sentence-per-line or normalize mode, never skip based on line length
104        if config.reflow
105            && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
106        {
107            return false;
108        }
109
110        // Quick check: if total content is shorter than line limit, definitely skip
111        if ctx.content.len() <= config.line_length.get() {
112            return true;
113        }
114
115        // Skip if no line exceeds the limit
116        !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
117    }
118}
119
120impl Rule for MD013LineLength {
121    fn name(&self) -> &'static str {
122        "MD013"
123    }
124
125    fn description(&self) -> &'static str {
126        "Line length should not be excessive"
127    }
128
129    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
130        let content = ctx.content;
131
132        // Parse inline configuration FIRST so we can use effective config for should_skip
133        let inline_config = crate::inline_config::InlineConfig::from_content(content);
134        let config_override = inline_config.get_rule_config("MD013");
135
136        // Apply configuration override if present
137        let effective_config = if let Some(json_config) = config_override {
138            if let Some(obj) = json_config.as_object() {
139                let mut config = self.config.clone();
140                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
141                    config.line_length = crate::types::LineLength::new(line_length as usize);
142                }
143                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
144                    config.code_blocks = code_blocks;
145                }
146                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
147                    config.tables = tables;
148                }
149                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
150                    config.headings = headings;
151                }
152                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
153                    config.strict = strict;
154                }
155                if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
156                    config.reflow = reflow;
157                }
158                if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
159                    config.reflow_mode = match reflow_mode {
160                        "default" => ReflowMode::Default,
161                        "normalize" => ReflowMode::Normalize,
162                        "sentence-per-line" => ReflowMode::SentencePerLine,
163                        _ => ReflowMode::default(),
164                    };
165                }
166                config
167            } else {
168                self.config.clone()
169            }
170        } else {
171            self.config.clone()
172        };
173
174        // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
175        // But don't skip if we're in reflow mode with Normalize or SentencePerLine
176        if self.should_skip_with_config(ctx, &effective_config)
177            && !(effective_config.reflow
178                && (effective_config.reflow_mode == ReflowMode::Normalize
179                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
180        {
181            return Ok(Vec::new());
182        }
183
184        // Direct implementation without DocumentStructure
185        let mut warnings = Vec::new();
186
187        // Special handling: line_length = 0 means "no line length limit"
188        // Skip all line length checks, but still allow reflow if enabled
189        let skip_length_checks = effective_config.line_length.is_unlimited();
190
191        // Pre-filter lines that could be problematic to avoid processing all lines
192        let mut candidate_lines = Vec::new();
193        if !skip_length_checks {
194            for (line_idx, line_info) in ctx.lines.iter().enumerate() {
195                // Skip front matter - it should never be linted
196                if line_info.in_front_matter {
197                    continue;
198                }
199
200                // Quick length check first
201                if line_info.byte_len > effective_config.line_length.get() {
202                    candidate_lines.push(line_idx);
203                }
204            }
205        }
206
207        // If no candidate lines and not in normalize or sentence-per-line mode, early return
208        if candidate_lines.is_empty()
209            && !(effective_config.reflow
210                && (effective_config.reflow_mode == ReflowMode::Normalize
211                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
212        {
213            return Ok(warnings);
214        }
215
216        // Use ctx.lines if available for better performance
217        let lines: Vec<&str> = if !ctx.lines.is_empty() {
218            ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
219        } else {
220            content.lines().collect()
221        };
222
223        // Create a quick lookup set for heading lines
224        // We need this for both the heading skip check AND the paragraphs check
225        let heading_lines_set: std::collections::HashSet<usize> = ctx
226            .lines
227            .iter()
228            .enumerate()
229            .filter(|(_, line)| line.heading.is_some())
230            .map(|(idx, _)| idx + 1)
231            .collect();
232
233        // Use pre-computed table blocks from context
234        // We need this for both the table skip check AND the paragraphs check
235        let table_blocks = &ctx.table_blocks;
236        let mut table_lines_set = std::collections::HashSet::new();
237        for table in table_blocks {
238            table_lines_set.insert(table.header_line + 1);
239            table_lines_set.insert(table.delimiter_line + 1);
240            for &line in &table.content_lines {
241                table_lines_set.insert(line + 1);
242            }
243        }
244
245        // Process candidate lines for line length checks
246        for &line_idx in &candidate_lines {
247            let line_number = line_idx + 1;
248            let line = lines[line_idx];
249
250            // Calculate effective length excluding unbreakable URLs
251            let effective_length = self.calculate_effective_length(line);
252
253            // Use single line length limit for all content
254            let line_limit = effective_config.line_length.get();
255
256            // Skip short lines immediately (double-check after effective length calculation)
257            if effective_length <= line_limit {
258                continue;
259            }
260
261            // Skip mkdocstrings blocks (already handled by LintContext)
262            if ctx.lines[line_idx].in_mkdocstrings {
263                continue;
264            }
265
266            // Skip various block types efficiently
267            if !effective_config.strict {
268                // Skip setext heading underlines
269                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
270                    continue;
271                }
272
273                // Skip block elements according to config flags
274                // The flags mean: true = check these elements, false = skip these elements
275                // So we skip when the flag is FALSE and the line is in that element type
276                if (!effective_config.headings && heading_lines_set.contains(&line_number))
277                    || (!effective_config.code_blocks
278                        && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
279                    || (!effective_config.tables && table_lines_set.contains(&line_number))
280                    || ctx.lines[line_number - 1].blockquote.is_some()
281                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
282                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
283                    || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
284                {
285                    continue;
286                }
287
288                // Check if this is a paragraph/regular text line
289                // If paragraphs = false, skip lines that are NOT in special blocks
290                if !effective_config.paragraphs {
291                    let is_special_block = heading_lines_set.contains(&line_number)
292                        || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
293                        || table_lines_set.contains(&line_number)
294                        || ctx.lines[line_number - 1].blockquote.is_some()
295                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
296                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
297                        || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block);
298
299                    // Skip regular paragraph text when paragraphs = false
300                    if !is_special_block {
301                        continue;
302                    }
303                }
304
305                // Skip lines that are only a URL, image ref, or link ref
306                if self.should_ignore_line(line, &lines, line_idx, ctx) {
307                    continue;
308                }
309            }
310
311            // In sentence-per-line mode, check if this is a single long sentence
312            // If so, emit a warning without a fix (user must manually rephrase)
313            if effective_config.reflow_mode == ReflowMode::SentencePerLine {
314                let sentences = split_into_sentences(line.trim());
315                if sentences.len() == 1 {
316                    // Single sentence that's too long - warn but don't auto-fix
317                    let message = format!("Line length {effective_length} exceeds {line_limit} characters");
318
319                    let (start_line, start_col, end_line, end_col) =
320                        calculate_excess_range(line_number, line, line_limit);
321
322                    warnings.push(LintWarning {
323                        rule_name: Some(self.name().to_string()),
324                        message,
325                        line: start_line,
326                        column: start_col,
327                        end_line,
328                        end_column: end_col,
329                        severity: Severity::Warning,
330                        fix: None, // No auto-fix for long single sentences
331                    });
332                    continue;
333                }
334                // Multiple sentences will be handled by paragraph-based reflow
335                continue;
336            }
337
338            // Don't provide fix for individual lines when reflow is enabled
339            // Paragraph-based fixes will be handled separately
340            let fix = None;
341
342            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
343
344            // Calculate precise character range for the excess portion
345            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
346
347            warnings.push(LintWarning {
348                rule_name: Some(self.name().to_string()),
349                message,
350                line: start_line,
351                column: start_col,
352                end_line,
353                end_column: end_col,
354                severity: Severity::Warning,
355                fix,
356            });
357        }
358
359        // If reflow is enabled, generate paragraph-based fixes
360        if effective_config.reflow {
361            let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
362            // Merge paragraph warnings with line warnings, removing duplicates
363            for pw in paragraph_warnings {
364                // Remove any line warnings that overlap with this paragraph
365                warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
366                warnings.push(pw);
367            }
368        }
369
370        Ok(warnings)
371    }
372
373    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
374        // For CLI usage, apply fixes from warnings
375        // LSP will use the warning-based fixes directly
376        let warnings = self.check(ctx)?;
377
378        // If there are no fixes, return content unchanged
379        if !warnings.iter().any(|w| w.fix.is_some()) {
380            return Ok(ctx.content.to_string());
381        }
382
383        // Apply warning-based fixes
384        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
385            .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
386    }
387
388    fn as_any(&self) -> &dyn std::any::Any {
389        self
390    }
391
392    fn category(&self) -> RuleCategory {
393        RuleCategory::Whitespace
394    }
395
396    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
397        self.should_skip_with_config(ctx, &self.config)
398    }
399
400    fn default_config_section(&self) -> Option<(String, toml::Value)> {
401        let default_config = MD013Config::default();
402        let json_value = serde_json::to_value(&default_config).ok()?;
403        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
404
405        if let toml::Value::Table(table) = toml_value {
406            if !table.is_empty() {
407                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
408            } else {
409                None
410            }
411        } else {
412            None
413        }
414    }
415
416    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
417        let mut aliases = std::collections::HashMap::new();
418        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
419        Some(aliases)
420    }
421
422    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
423    where
424        Self: Sized,
425    {
426        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
427        // Use global line_length if rule-specific config still has default value
428        if rule_config.line_length.get() == 80 {
429            rule_config.line_length = config.global.line_length;
430        }
431        Box::new(Self::from_config_struct(rule_config))
432    }
433}
434
435impl MD013LineLength {
436    /// Generate paragraph-based fixes
437    fn generate_paragraph_fixes(
438        &self,
439        ctx: &crate::lint_context::LintContext,
440        config: &MD013Config,
441        lines: &[&str],
442    ) -> Vec<LintWarning> {
443        let mut warnings = Vec::new();
444        let line_index = LineIndex::new(ctx.content);
445
446        let mut i = 0;
447        while i < lines.len() {
448            let line_num = i + 1;
449
450            // Skip special structures
451            let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
452                info.in_code_block
453                    || info.in_front_matter
454                    || info.in_html_block
455                    || info.in_html_comment
456                    || info.in_esm_block
457            });
458
459            if should_skip_due_to_line_info
460                || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
461                || lines[i].trim().starts_with('#')
462                || TableUtils::is_potential_table_row(lines[i])
463                || lines[i].trim().is_empty()
464                || is_horizontal_rule(lines[i].trim())
465                || is_template_directive_only(lines[i])
466            {
467                i += 1;
468                continue;
469            }
470
471            // Helper function to detect semantic line markers
472            let is_semantic_line = |content: &str| -> bool {
473                let trimmed = content.trim_start();
474                let semantic_markers = [
475                    "NOTE:",
476                    "WARNING:",
477                    "IMPORTANT:",
478                    "CAUTION:",
479                    "TIP:",
480                    "DANGER:",
481                    "HINT:",
482                    "INFO:",
483                ];
484                semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
485            };
486
487            // Helper function to detect fence markers (opening or closing)
488            let is_fence_marker = |content: &str| -> bool {
489                let trimmed = content.trim_start();
490                trimmed.starts_with("```") || trimmed.starts_with("~~~")
491            };
492
493            // Check if this is a list item - handle it specially
494            let trimmed = lines[i].trim();
495            if is_list_item(trimmed) {
496                // Collect the entire list item including continuation lines
497                let list_start = i;
498                let (marker, first_content) = extract_list_marker_and_content(lines[i]);
499                let marker_len = marker.len();
500
501                // Track lines and their types (content, code block, fence, nested list)
502                #[derive(Clone)]
503                enum LineType {
504                    Content(String),
505                    CodeBlock(String, usize),      // content and original indent
506                    NestedListItem(String, usize), // full line content and original indent
507                    SemanticLine(String),          // Lines starting with NOTE:, WARNING:, etc that should stay separate
508                    Empty,
509                }
510
511                let mut actual_indent: Option<usize> = None;
512                let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
513                i += 1;
514
515                // Collect continuation lines using ctx.lines for metadata
516                while i < lines.len() {
517                    let line_info = &ctx.lines[i];
518
519                    // Use pre-computed is_blank from ctx
520                    if line_info.is_blank {
521                        // Empty line - check if next line is indented (part of list item)
522                        if i + 1 < lines.len() {
523                            let next_info = &ctx.lines[i + 1];
524
525                            // Check if next line is indented enough to be continuation
526                            if !next_info.is_blank && next_info.indent >= marker_len {
527                                // This blank line is between paragraphs/blocks in the list item
528                                list_item_lines.push(LineType::Empty);
529                                i += 1;
530                                continue;
531                            }
532                        }
533                        // No indented line after blank, end of list item
534                        break;
535                    }
536
537                    // Use pre-computed indent from ctx
538                    let indent = line_info.indent;
539
540                    // Valid continuation must be indented at least marker_len
541                    if indent >= marker_len {
542                        let trimmed = line_info.content(ctx.content).trim();
543
544                        // Use pre-computed in_code_block from ctx
545                        if line_info.in_code_block {
546                            list_item_lines.push(LineType::CodeBlock(
547                                line_info.content(ctx.content)[indent..].to_string(),
548                                indent,
549                            ));
550                            i += 1;
551                            continue;
552                        }
553
554                        // Check if this is a SIBLING list item (breaks parent)
555                        // Nested lists are indented >= marker_len and are PART of the parent item
556                        // Siblings are at indent < marker_len (at or before parent marker)
557                        if is_list_item(trimmed) && indent < marker_len {
558                            // This is a sibling item at same or higher level - end parent item
559                            break;
560                        }
561
562                        // Check if this is a NESTED list item marker
563                        // Nested lists should be processed separately UNLESS they're part of a
564                        // multi-paragraph list item (indicated by a blank line before them OR
565                        // it's a continuation of an already-started nested list)
566                        if is_list_item(trimmed) && indent >= marker_len {
567                            // Check if there was a blank line before this (multi-paragraph context)
568                            let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
569
570                            // Check if we've already seen nested list content (another nested item)
571                            let has_nested_content = list_item_lines.iter().any(|line| {
572                                matches!(line, LineType::Content(c) if is_list_item(c.trim()))
573                                    || matches!(line, LineType::NestedListItem(_, _))
574                            });
575
576                            if !has_blank_before && !has_nested_content {
577                                // Single-paragraph context with no prior nested items: starts a new item
578                                // End parent collection; nested list will be processed next
579                                break;
580                            }
581                            // else: multi-paragraph context or continuation of nested list, keep collecting
582                            // Mark this as a nested list item to preserve its structure
583                            list_item_lines.push(LineType::NestedListItem(
584                                line_info.content(ctx.content)[indent..].to_string(),
585                                indent,
586                            ));
587                            i += 1;
588                            continue;
589                        }
590
591                        // Normal continuation: marker_len to marker_len+3
592                        if indent <= marker_len + 3 {
593                            // Set actual_indent from first non-code continuation if not set
594                            if actual_indent.is_none() {
595                                actual_indent = Some(indent);
596                            }
597
598                            // Extract content (remove indentation and trailing whitespace)
599                            // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
600                            // See: https://github.com/rvben/rumdl/issues/76
601                            let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
602
603                            // Check if this is a fence marker (opening or closing)
604                            // These should be treated as code block lines, not paragraph content
605                            if is_fence_marker(&content) {
606                                list_item_lines.push(LineType::CodeBlock(content, indent));
607                            }
608                            // Check if this is a semantic line (NOTE:, WARNING:, etc.)
609                            else if is_semantic_line(&content) {
610                                list_item_lines.push(LineType::SemanticLine(content));
611                            } else {
612                                list_item_lines.push(LineType::Content(content));
613                            }
614                            i += 1;
615                        } else {
616                            // indent >= marker_len + 4: indented code block
617                            list_item_lines.push(LineType::CodeBlock(
618                                line_info.content(ctx.content)[indent..].to_string(),
619                                indent,
620                            ));
621                            i += 1;
622                        }
623                    } else {
624                        // Not indented enough, end of list item
625                        break;
626                    }
627                }
628
629                // Use detected indent or fallback to marker length
630                let indent_size = actual_indent.unwrap_or(marker_len);
631                let expected_indent = " ".repeat(indent_size);
632
633                // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
634                #[derive(Clone)]
635                enum Block {
636                    Paragraph(Vec<String>),
637                    Code {
638                        lines: Vec<(String, usize)>, // (content, indent) pairs
639                        has_preceding_blank: bool,   // Whether there was a blank line before this block
640                    },
641                    NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
642                    SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
643                    Html {
644                        lines: Vec<String>,        // HTML content preserved exactly as-is
645                        has_preceding_blank: bool, // Whether there was a blank line before this block
646                    },
647                }
648
649                // HTML tag detection helpers
650                // Block-level HTML tags that should trigger HTML block detection
651                const BLOCK_LEVEL_TAGS: &[&str] = &[
652                    "div",
653                    "details",
654                    "summary",
655                    "section",
656                    "article",
657                    "header",
658                    "footer",
659                    "nav",
660                    "aside",
661                    "main",
662                    "table",
663                    "thead",
664                    "tbody",
665                    "tfoot",
666                    "tr",
667                    "td",
668                    "th",
669                    "ul",
670                    "ol",
671                    "li",
672                    "dl",
673                    "dt",
674                    "dd",
675                    "pre",
676                    "blockquote",
677                    "figure",
678                    "figcaption",
679                    "form",
680                    "fieldset",
681                    "legend",
682                    "hr",
683                    "p",
684                    "h1",
685                    "h2",
686                    "h3",
687                    "h4",
688                    "h5",
689                    "h6",
690                    "style",
691                    "script",
692                    "noscript",
693                ];
694
695                fn is_block_html_opening_tag(line: &str) -> Option<String> {
696                    let trimmed = line.trim();
697
698                    // Check for HTML comments
699                    if trimmed.starts_with("<!--") {
700                        return Some("!--".to_string());
701                    }
702
703                    // Check for opening tags
704                    if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
705                        // Extract tag name from <tagname ...> or <tagname>
706                        let after_bracket = &trimmed[1..];
707                        if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
708                            let tag_name = after_bracket[..end].to_lowercase();
709
710                            // Only treat as block if it's a known block-level tag
711                            if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
712                                return Some(tag_name);
713                            }
714                        }
715                    }
716                    None
717                }
718
719                fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
720                    let trimmed = line.trim();
721
722                    // Special handling for HTML comments
723                    if tag_name == "!--" {
724                        return trimmed.ends_with("-->");
725                    }
726
727                    // Check for closing tags: </tagname> or </tagname ...>
728                    trimmed.starts_with(&format!("</{tag_name}>"))
729                        || trimmed.starts_with(&format!("</{tag_name}  "))
730                        || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
731                }
732
733                fn is_self_closing_tag(line: &str) -> bool {
734                    let trimmed = line.trim();
735                    trimmed.ends_with("/>")
736                }
737
738                let mut blocks: Vec<Block> = Vec::new();
739                let mut current_paragraph: Vec<String> = Vec::new();
740                let mut current_code_block: Vec<(String, usize)> = Vec::new();
741                let mut current_nested_list: Vec<(String, usize)> = Vec::new();
742                let mut current_html_block: Vec<String> = Vec::new();
743                let mut html_tag_stack: Vec<String> = Vec::new();
744                let mut in_code = false;
745                let mut in_nested_list = false;
746                let mut in_html_block = false;
747                let mut had_preceding_blank = false; // Track if we just saw an empty line
748                let mut code_block_has_preceding_blank = false; // Track blank before current code block
749                let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
750
751                for line in &list_item_lines {
752                    match line {
753                        LineType::Empty => {
754                            if in_code {
755                                current_code_block.push((String::new(), 0));
756                            } else if in_nested_list {
757                                current_nested_list.push((String::new(), 0));
758                            } else if in_html_block {
759                                // Allow blank lines inside HTML blocks
760                                current_html_block.push(String::new());
761                            } else if !current_paragraph.is_empty() {
762                                blocks.push(Block::Paragraph(current_paragraph.clone()));
763                                current_paragraph.clear();
764                            }
765                            // Mark that we saw a blank line
766                            had_preceding_blank = true;
767                        }
768                        LineType::Content(content) => {
769                            // Check if we're currently in an HTML block
770                            if in_html_block {
771                                current_html_block.push(content.clone());
772
773                                // Check if this line closes any open HTML tags
774                                if let Some(last_tag) = html_tag_stack.last() {
775                                    if is_html_closing_tag(content, last_tag) {
776                                        html_tag_stack.pop();
777
778                                        // If stack is empty, HTML block is complete
779                                        if html_tag_stack.is_empty() {
780                                            blocks.push(Block::Html {
781                                                lines: current_html_block.clone(),
782                                                has_preceding_blank: html_block_has_preceding_blank,
783                                            });
784                                            current_html_block.clear();
785                                            in_html_block = false;
786                                        }
787                                    } else if let Some(new_tag) = is_block_html_opening_tag(content) {
788                                        // Nested opening tag within HTML block
789                                        if !is_self_closing_tag(content) {
790                                            html_tag_stack.push(new_tag);
791                                        }
792                                    }
793                                }
794                                had_preceding_blank = false;
795                            } else {
796                                // Not in HTML block - check if this line starts one
797                                if let Some(tag_name) = is_block_html_opening_tag(content) {
798                                    // Flush current paragraph before starting HTML block
799                                    if in_code {
800                                        blocks.push(Block::Code {
801                                            lines: current_code_block.clone(),
802                                            has_preceding_blank: code_block_has_preceding_blank,
803                                        });
804                                        current_code_block.clear();
805                                        in_code = false;
806                                    } else if in_nested_list {
807                                        blocks.push(Block::NestedList(current_nested_list.clone()));
808                                        current_nested_list.clear();
809                                        in_nested_list = false;
810                                    } else if !current_paragraph.is_empty() {
811                                        blocks.push(Block::Paragraph(current_paragraph.clone()));
812                                        current_paragraph.clear();
813                                    }
814
815                                    // Start new HTML block
816                                    in_html_block = true;
817                                    html_block_has_preceding_blank = had_preceding_blank;
818                                    current_html_block.push(content.clone());
819
820                                    // Check if it's self-closing or needs a closing tag
821                                    if is_self_closing_tag(content) {
822                                        // Self-closing tag - complete the HTML block immediately
823                                        blocks.push(Block::Html {
824                                            lines: current_html_block.clone(),
825                                            has_preceding_blank: html_block_has_preceding_blank,
826                                        });
827                                        current_html_block.clear();
828                                        in_html_block = false;
829                                    } else {
830                                        // Regular opening tag - push to stack
831                                        html_tag_stack.push(tag_name);
832                                    }
833                                } else {
834                                    // Regular content line - add to paragraph
835                                    if in_code {
836                                        // Switching from code to content
837                                        blocks.push(Block::Code {
838                                            lines: current_code_block.clone(),
839                                            has_preceding_blank: code_block_has_preceding_blank,
840                                        });
841                                        current_code_block.clear();
842                                        in_code = false;
843                                    } else if in_nested_list {
844                                        // Switching from nested list to content
845                                        blocks.push(Block::NestedList(current_nested_list.clone()));
846                                        current_nested_list.clear();
847                                        in_nested_list = false;
848                                    }
849                                    current_paragraph.push(content.clone());
850                                }
851                                had_preceding_blank = false; // Reset after content
852                            }
853                        }
854                        LineType::CodeBlock(content, indent) => {
855                            if in_nested_list {
856                                // Switching from nested list to code
857                                blocks.push(Block::NestedList(current_nested_list.clone()));
858                                current_nested_list.clear();
859                                in_nested_list = false;
860                            } else if in_html_block {
861                                // Switching from HTML block to code (shouldn't happen normally, but handle it)
862                                blocks.push(Block::Html {
863                                    lines: current_html_block.clone(),
864                                    has_preceding_blank: html_block_has_preceding_blank,
865                                });
866                                current_html_block.clear();
867                                html_tag_stack.clear();
868                                in_html_block = false;
869                            }
870                            if !in_code {
871                                // Switching from content to code
872                                if !current_paragraph.is_empty() {
873                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
874                                    current_paragraph.clear();
875                                }
876                                in_code = true;
877                                // Record whether there was a blank line before this code block
878                                code_block_has_preceding_blank = had_preceding_blank;
879                            }
880                            current_code_block.push((content.clone(), *indent));
881                            had_preceding_blank = false; // Reset after code
882                        }
883                        LineType::NestedListItem(content, indent) => {
884                            if in_code {
885                                // Switching from code to nested list
886                                blocks.push(Block::Code {
887                                    lines: current_code_block.clone(),
888                                    has_preceding_blank: code_block_has_preceding_blank,
889                                });
890                                current_code_block.clear();
891                                in_code = false;
892                            } else if in_html_block {
893                                // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
894                                blocks.push(Block::Html {
895                                    lines: current_html_block.clone(),
896                                    has_preceding_blank: html_block_has_preceding_blank,
897                                });
898                                current_html_block.clear();
899                                html_tag_stack.clear();
900                                in_html_block = false;
901                            }
902                            if !in_nested_list {
903                                // Switching from content to nested list
904                                if !current_paragraph.is_empty() {
905                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
906                                    current_paragraph.clear();
907                                }
908                                in_nested_list = true;
909                            }
910                            current_nested_list.push((content.clone(), *indent));
911                            had_preceding_blank = false; // Reset after nested list
912                        }
913                        LineType::SemanticLine(content) => {
914                            // Semantic lines are standalone - flush any current block and add as separate block
915                            if in_code {
916                                blocks.push(Block::Code {
917                                    lines: current_code_block.clone(),
918                                    has_preceding_blank: code_block_has_preceding_blank,
919                                });
920                                current_code_block.clear();
921                                in_code = false;
922                            } else if in_nested_list {
923                                blocks.push(Block::NestedList(current_nested_list.clone()));
924                                current_nested_list.clear();
925                                in_nested_list = false;
926                            } else if in_html_block {
927                                blocks.push(Block::Html {
928                                    lines: current_html_block.clone(),
929                                    has_preceding_blank: html_block_has_preceding_blank,
930                                });
931                                current_html_block.clear();
932                                html_tag_stack.clear();
933                                in_html_block = false;
934                            } else if !current_paragraph.is_empty() {
935                                blocks.push(Block::Paragraph(current_paragraph.clone()));
936                                current_paragraph.clear();
937                            }
938                            // Add semantic line as its own block
939                            blocks.push(Block::SemanticLine(content.clone()));
940                            had_preceding_blank = false; // Reset after semantic line
941                        }
942                    }
943                }
944
945                // Push remaining block
946                if in_code && !current_code_block.is_empty() {
947                    blocks.push(Block::Code {
948                        lines: current_code_block,
949                        has_preceding_blank: code_block_has_preceding_blank,
950                    });
951                } else if in_nested_list && !current_nested_list.is_empty() {
952                    blocks.push(Block::NestedList(current_nested_list));
953                } else if in_html_block && !current_html_block.is_empty() {
954                    // If we still have an unclosed HTML block, push it anyway
955                    // (malformed HTML - missing closing tag)
956                    blocks.push(Block::Html {
957                        lines: current_html_block,
958                        has_preceding_blank: html_block_has_preceding_blank,
959                    });
960                } else if !current_paragraph.is_empty() {
961                    blocks.push(Block::Paragraph(current_paragraph));
962                }
963
964                // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
965                let content_lines: Vec<String> = list_item_lines
966                    .iter()
967                    .filter_map(|line| {
968                        if let LineType::Content(s) = line {
969                            Some(s.clone())
970                        } else {
971                            None
972                        }
973                    })
974                    .collect();
975
976                // Check if we need to reflow this list item
977                // We check the combined content to see if it exceeds length limits
978                let combined_content = content_lines.join(" ").trim().to_string();
979                let full_line = format!("{marker}{combined_content}");
980
981                // Helper to check if we should reflow in normalize mode
982                let should_normalize = || {
983                    // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
984                    // DO normalize if it has plain text content that spans multiple lines
985                    let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
986                    let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
987                    let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
988                    let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
989
990                    // If we have nested lists, code blocks, or semantic lines but no paragraphs, don't normalize
991                    if (has_nested_lists || has_code_blocks || has_semantic_lines) && !has_paragraphs {
992                        return false;
993                    }
994
995                    // If we have paragraphs, check if they span multiple lines or there are multiple blocks
996                    if has_paragraphs {
997                        let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
998                        if paragraph_count > 1 {
999                            // Multiple paragraph blocks should be normalized
1000                            return true;
1001                        }
1002
1003                        // Single paragraph block: normalize if it has multiple content lines
1004                        if content_lines.len() > 1 {
1005                            return true;
1006                        }
1007                    }
1008
1009                    false
1010                };
1011
1012                let needs_reflow = match config.reflow_mode {
1013                    ReflowMode::Normalize => {
1014                        // Only reflow if:
1015                        // 1. The combined line would exceed the limit, OR
1016                        // 2. The list item should be normalized (has multi-line plain text)
1017                        let combined_length = self.calculate_effective_length(&full_line);
1018                        if combined_length > config.line_length.get() {
1019                            true
1020                        } else {
1021                            should_normalize()
1022                        }
1023                    }
1024                    ReflowMode::SentencePerLine => {
1025                        // Check if list item has multiple sentences
1026                        let sentences = split_into_sentences(&combined_content);
1027                        sentences.len() > 1
1028                    }
1029                    ReflowMode::Default => {
1030                        // In default mode, only reflow if any individual line exceeds limit
1031                        // Check the original lines, not the combined content
1032                        (list_start..i)
1033                            .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1034                    }
1035                };
1036
1037                if needs_reflow {
1038                    let start_range = line_index.whole_line_range(list_start + 1);
1039                    let end_line = i - 1;
1040                    let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1041                        line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1042                    } else {
1043                        line_index.whole_line_range(end_line + 1)
1044                    };
1045                    let byte_range = start_range.start..end_range.end;
1046
1047                    // Reflow each block (paragraphs only, preserve code blocks)
1048                    // When line_length = 0 (no limit), use a very large value for reflow
1049                    let reflow_line_length = if config.line_length.is_unlimited() {
1050                        usize::MAX
1051                    } else {
1052                        config.line_length.get().saturating_sub(indent_size).max(1)
1053                    };
1054                    let reflow_options = crate::utils::text_reflow::ReflowOptions {
1055                        line_length: reflow_line_length,
1056                        break_on_sentences: true,
1057                        preserve_breaks: false,
1058                        sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1059                        abbreviations: config.abbreviations.clone(),
1060                    };
1061
1062                    let mut result: Vec<String> = Vec::new();
1063                    let mut is_first_block = true;
1064
1065                    for (block_idx, block) in blocks.iter().enumerate() {
1066                        match block {
1067                            Block::Paragraph(para_lines) => {
1068                                // Split the paragraph into segments at hard break boundaries
1069                                // Each segment can be reflowed independently
1070                                let segments = split_into_segments(para_lines);
1071
1072                                for (segment_idx, segment) in segments.iter().enumerate() {
1073                                    // Check if this segment ends with a hard break and what type
1074                                    let hard_break_type = segment.last().and_then(|line| {
1075                                        let line = line.strip_suffix('\r').unwrap_or(line);
1076                                        if line.ends_with('\\') {
1077                                            Some("\\")
1078                                        } else if line.ends_with("  ") {
1079                                            Some("  ")
1080                                        } else {
1081                                            None
1082                                        }
1083                                    });
1084
1085                                    // Join and reflow the segment (removing the hard break marker for processing)
1086                                    let segment_for_reflow: Vec<String> = segment
1087                                        .iter()
1088                                        .map(|line| {
1089                                            // Strip hard break marker (2 spaces or backslash) for reflow processing
1090                                            if line.ends_with('\\') {
1091                                                line[..line.len() - 1].trim_end().to_string()
1092                                            } else if line.ends_with("  ") {
1093                                                line[..line.len() - 2].trim_end().to_string()
1094                                            } else {
1095                                                line.clone()
1096                                            }
1097                                        })
1098                                        .collect();
1099
1100                                    let segment_text = segment_for_reflow.join(" ").trim().to_string();
1101                                    if !segment_text.is_empty() {
1102                                        let reflowed =
1103                                            crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1104
1105                                        if is_first_block && segment_idx == 0 {
1106                                            // First segment of first block starts with marker
1107                                            result.push(format!("{marker}{}", reflowed[0]));
1108                                            for line in reflowed.iter().skip(1) {
1109                                                result.push(format!("{expected_indent}{line}"));
1110                                            }
1111                                            is_first_block = false;
1112                                        } else {
1113                                            // Subsequent segments
1114                                            for line in reflowed {
1115                                                result.push(format!("{expected_indent}{line}"));
1116                                            }
1117                                        }
1118
1119                                        // If this segment had a hard break, add it back to the last line
1120                                        // Preserve the original hard break format (backslash or two spaces)
1121                                        if let Some(break_marker) = hard_break_type
1122                                            && let Some(last_line) = result.last_mut()
1123                                        {
1124                                            last_line.push_str(break_marker);
1125                                        }
1126                                    }
1127                                }
1128
1129                                // Add blank line after paragraph block if there's a next block
1130                                // BUT: check if next block is a code block that doesn't want a preceding blank
1131                                if block_idx < blocks.len() - 1 {
1132                                    let next_block = &blocks[block_idx + 1];
1133                                    let should_add_blank = match next_block {
1134                                        Block::Code {
1135                                            has_preceding_blank, ..
1136                                        } => *has_preceding_blank,
1137                                        _ => true, // For all other blocks, add blank line
1138                                    };
1139                                    if should_add_blank {
1140                                        result.push(String::new());
1141                                    }
1142                                }
1143                            }
1144                            Block::Code {
1145                                lines: code_lines,
1146                                has_preceding_blank: _,
1147                            } => {
1148                                // Preserve code blocks as-is with original indentation
1149                                // NOTE: Blank line before code block is handled by the previous block
1150                                // (see paragraph block's logic above)
1151
1152                                for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1153                                    if is_first_block && idx == 0 {
1154                                        // First line of first block gets marker
1155                                        result.push(format!(
1156                                            "{marker}{}",
1157                                            " ".repeat(orig_indent - marker_len) + content
1158                                        ));
1159                                        is_first_block = false;
1160                                    } else if content.is_empty() {
1161                                        result.push(String::new());
1162                                    } else {
1163                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1164                                    }
1165                                }
1166                            }
1167                            Block::NestedList(nested_items) => {
1168                                // Preserve nested list items as-is with original indentation
1169                                if !is_first_block {
1170                                    result.push(String::new());
1171                                }
1172
1173                                for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1174                                    if is_first_block && idx == 0 {
1175                                        // First line of first block gets marker
1176                                        result.push(format!(
1177                                            "{marker}{}",
1178                                            " ".repeat(orig_indent - marker_len) + content
1179                                        ));
1180                                        is_first_block = false;
1181                                    } else if content.is_empty() {
1182                                        result.push(String::new());
1183                                    } else {
1184                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1185                                    }
1186                                }
1187
1188                                // Add blank line after nested list if there's a next block
1189                                // Check if next block is a code block that doesn't want a preceding blank
1190                                if block_idx < blocks.len() - 1 {
1191                                    let next_block = &blocks[block_idx + 1];
1192                                    let should_add_blank = match next_block {
1193                                        Block::Code {
1194                                            has_preceding_blank, ..
1195                                        } => *has_preceding_blank,
1196                                        _ => true, // For all other blocks, add blank line
1197                                    };
1198                                    if should_add_blank {
1199                                        result.push(String::new());
1200                                    }
1201                                }
1202                            }
1203                            Block::SemanticLine(content) => {
1204                                // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1205                                // Add blank line before if not first block
1206                                if !is_first_block {
1207                                    result.push(String::new());
1208                                }
1209
1210                                if is_first_block {
1211                                    // First block starts with marker
1212                                    result.push(format!("{marker}{content}"));
1213                                    is_first_block = false;
1214                                } else {
1215                                    // Subsequent blocks use expected indent
1216                                    result.push(format!("{expected_indent}{content}"));
1217                                }
1218
1219                                // Add blank line after semantic line if there's a next block
1220                                // Check if next block is a code block that doesn't want a preceding blank
1221                                if block_idx < blocks.len() - 1 {
1222                                    let next_block = &blocks[block_idx + 1];
1223                                    let should_add_blank = match next_block {
1224                                        Block::Code {
1225                                            has_preceding_blank, ..
1226                                        } => *has_preceding_blank,
1227                                        _ => true, // For all other blocks, add blank line
1228                                    };
1229                                    if should_add_blank {
1230                                        result.push(String::new());
1231                                    }
1232                                }
1233                            }
1234                            Block::Html {
1235                                lines: html_lines,
1236                                has_preceding_blank: _,
1237                            } => {
1238                                // Preserve HTML blocks exactly as-is with original indentation
1239                                // NOTE: Blank line before HTML block is handled by the previous block
1240
1241                                for (idx, line) in html_lines.iter().enumerate() {
1242                                    if is_first_block && idx == 0 {
1243                                        // First line of first block gets marker
1244                                        result.push(format!("{marker}{line}"));
1245                                        is_first_block = false;
1246                                    } else if line.is_empty() {
1247                                        // Preserve blank lines inside HTML blocks
1248                                        result.push(String::new());
1249                                    } else {
1250                                        // Preserve lines with their original content (already includes indentation)
1251                                        result.push(format!("{expected_indent}{line}"));
1252                                    }
1253                                }
1254
1255                                // Add blank line after HTML block if there's a next block
1256                                if block_idx < blocks.len() - 1 {
1257                                    let next_block = &blocks[block_idx + 1];
1258                                    let should_add_blank = match next_block {
1259                                        Block::Code {
1260                                            has_preceding_blank, ..
1261                                        } => *has_preceding_blank,
1262                                        Block::Html {
1263                                            has_preceding_blank, ..
1264                                        } => *has_preceding_blank,
1265                                        _ => true, // For all other blocks, add blank line
1266                                    };
1267                                    if should_add_blank {
1268                                        result.push(String::new());
1269                                    }
1270                                }
1271                            }
1272                        }
1273                    }
1274
1275                    let reflowed_text = result.join("\n");
1276
1277                    // Preserve trailing newline
1278                    let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1279                        format!("{reflowed_text}\n")
1280                    } else {
1281                        reflowed_text
1282                    };
1283
1284                    // Get the original text to compare
1285                    let original_text = &ctx.content[byte_range.clone()];
1286
1287                    // Only generate a warning if the replacement is different from the original
1288                    if original_text != replacement {
1289                        // Generate an appropriate message based on why reflow is needed
1290                        let message = match config.reflow_mode {
1291                            ReflowMode::SentencePerLine => {
1292                                let num_sentences = split_into_sentences(&combined_content).len();
1293                                let num_lines = content_lines.len();
1294                                if num_lines == 1 {
1295                                    // Single line with multiple sentences
1296                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1297                                } else {
1298                                    // Multiple lines - could be split sentences or mixed
1299                                    format!(
1300                                        "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1301                                    )
1302                                }
1303                            }
1304                            ReflowMode::Normalize => {
1305                                let combined_length = self.calculate_effective_length(&full_line);
1306                                if combined_length > config.line_length.get() {
1307                                    format!(
1308                                        "Line length {} exceeds {} characters",
1309                                        combined_length,
1310                                        config.line_length.get()
1311                                    )
1312                                } else {
1313                                    "Multi-line content can be normalized".to_string()
1314                                }
1315                            }
1316                            ReflowMode::Default => {
1317                                let combined_length = self.calculate_effective_length(&full_line);
1318                                format!(
1319                                    "Line length {} exceeds {} characters",
1320                                    combined_length,
1321                                    config.line_length.get()
1322                                )
1323                            }
1324                        };
1325
1326                        warnings.push(LintWarning {
1327                            rule_name: Some(self.name().to_string()),
1328                            message,
1329                            line: list_start + 1,
1330                            column: 1,
1331                            end_line: end_line + 1,
1332                            end_column: lines[end_line].len() + 1,
1333                            severity: Severity::Warning,
1334                            fix: Some(crate::rule::Fix {
1335                                range: byte_range,
1336                                replacement,
1337                            }),
1338                        });
1339                    }
1340                }
1341                continue;
1342            }
1343
1344            // Found start of a paragraph - collect all lines in it
1345            let paragraph_start = i;
1346            let mut paragraph_lines = vec![lines[i]];
1347            i += 1;
1348
1349            while i < lines.len() {
1350                let next_line = lines[i];
1351                let next_line_num = i + 1;
1352                let next_trimmed = next_line.trim();
1353
1354                // Stop at paragraph boundaries
1355                if next_trimmed.is_empty()
1356                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1357                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1358                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1359                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1360                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1361                    || (next_line_num > 0
1362                        && next_line_num <= ctx.lines.len()
1363                        && ctx.lines[next_line_num - 1].blockquote.is_some())
1364                    || next_trimmed.starts_with('#')
1365                    || TableUtils::is_potential_table_row(next_line)
1366                    || is_list_item(next_trimmed)
1367                    || is_horizontal_rule(next_trimmed)
1368                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1369                    || is_template_directive_only(next_line)
1370                {
1371                    break;
1372                }
1373
1374                // Check if the previous line ends with a hard break (2+ spaces or backslash)
1375                if i > 0 && has_hard_break(lines[i - 1]) {
1376                    // Don't include lines after hard breaks in the same paragraph
1377                    break;
1378                }
1379
1380                paragraph_lines.push(next_line);
1381                i += 1;
1382            }
1383
1384            // Combine paragraph lines into a single string for processing
1385            // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1386            let paragraph_text = paragraph_lines.join(" ");
1387
1388            // Skip reflowing if this paragraph contains definition list items
1389            // Definition lists are multi-line structures that should not be joined
1390            let contains_definition_list = paragraph_lines
1391                .iter()
1392                .any(|line| crate::utils::is_definition_list_item(line));
1393
1394            if contains_definition_list {
1395                // Don't reflow definition lists - skip this paragraph
1396                i = paragraph_start + paragraph_lines.len();
1397                continue;
1398            }
1399
1400            // Check if this paragraph needs reflowing
1401            let needs_reflow = match config.reflow_mode {
1402                ReflowMode::Normalize => {
1403                    // In normalize mode, reflow multi-line paragraphs
1404                    paragraph_lines.len() > 1
1405                }
1406                ReflowMode::SentencePerLine => {
1407                    // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1408                    // Note: we check the joined text because sentences can span multiple lines
1409                    let sentences = split_into_sentences(&paragraph_text);
1410
1411                    // Always reflow if multiple sentences on one line
1412                    if sentences.len() > 1 {
1413                        true
1414                    } else if paragraph_lines.len() > 1 {
1415                        // For single-sentence paragraphs spanning multiple lines:
1416                        // Reflow if they COULD fit on one line (respecting line-length constraint)
1417                        if config.line_length.is_unlimited() {
1418                            // No line-length constraint - always join single sentences
1419                            true
1420                        } else {
1421                            // Only join if it fits within line-length
1422                            let effective_length = self.calculate_effective_length(&paragraph_text);
1423                            effective_length <= config.line_length.get()
1424                        }
1425                    } else {
1426                        false
1427                    }
1428                }
1429                ReflowMode::Default => {
1430                    // In default mode, only reflow if lines exceed limit
1431                    paragraph_lines
1432                        .iter()
1433                        .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1434                }
1435            };
1436
1437            if needs_reflow {
1438                // Calculate byte range for this paragraph
1439                // Use whole_line_range for each line and combine
1440                let start_range = line_index.whole_line_range(paragraph_start + 1);
1441                let end_line = paragraph_start + paragraph_lines.len() - 1;
1442
1443                // For the last line, we want to preserve any trailing newline
1444                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1445                    // Last line without trailing newline - use line_text_range
1446                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1447                } else {
1448                    // Not the last line or has trailing newline - use whole_line_range
1449                    line_index.whole_line_range(end_line + 1)
1450                };
1451
1452                let byte_range = start_range.start..end_range.end;
1453
1454                // Check if the paragraph ends with a hard break and what type
1455                let hard_break_type = paragraph_lines.last().and_then(|line| {
1456                    let line = line.strip_suffix('\r').unwrap_or(line);
1457                    if line.ends_with('\\') {
1458                        Some("\\")
1459                    } else if line.ends_with("  ") {
1460                        Some("  ")
1461                    } else {
1462                        None
1463                    }
1464                });
1465
1466                // Reflow the paragraph
1467                // When line_length = 0 (no limit), use a very large value for reflow
1468                let reflow_line_length = if config.line_length.is_unlimited() {
1469                    usize::MAX
1470                } else {
1471                    config.line_length.get()
1472                };
1473                let reflow_options = crate::utils::text_reflow::ReflowOptions {
1474                    line_length: reflow_line_length,
1475                    break_on_sentences: true,
1476                    preserve_breaks: false,
1477                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1478                    abbreviations: config.abbreviations.clone(),
1479                };
1480                let mut reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
1481
1482                // If the original paragraph ended with a hard break, preserve it
1483                // Preserve the original hard break format (backslash or two spaces)
1484                if let Some(break_marker) = hard_break_type
1485                    && !reflowed.is_empty()
1486                {
1487                    let last_idx = reflowed.len() - 1;
1488                    if !has_hard_break(&reflowed[last_idx]) {
1489                        reflowed[last_idx].push_str(break_marker);
1490                    }
1491                }
1492
1493                let reflowed_text = reflowed.join("\n");
1494
1495                // Preserve trailing newline if the original paragraph had one
1496                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1497                    format!("{reflowed_text}\n")
1498                } else {
1499                    reflowed_text
1500                };
1501
1502                // Get the original text to compare
1503                let original_text = &ctx.content[byte_range.clone()];
1504
1505                // Only generate a warning if the replacement is different from the original
1506                if original_text != replacement {
1507                    // Create warning with actual fix
1508                    // In default mode, report the specific line that violates
1509                    // In normalize mode, report the whole paragraph
1510                    // In sentence-per-line mode, report the entire paragraph
1511                    let (warning_line, warning_end_line) = match config.reflow_mode {
1512                        ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1513                        ReflowMode::SentencePerLine => {
1514                            // Highlight the entire paragraph that needs reformatting
1515                            (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1516                        }
1517                        ReflowMode::Default => {
1518                            // Find the first line that exceeds the limit
1519                            let mut violating_line = paragraph_start;
1520                            for (idx, line) in paragraph_lines.iter().enumerate() {
1521                                if self.calculate_effective_length(line) > config.line_length.get() {
1522                                    violating_line = paragraph_start + idx;
1523                                    break;
1524                                }
1525                            }
1526                            (violating_line + 1, violating_line + 1)
1527                        }
1528                    };
1529
1530                    warnings.push(LintWarning {
1531                        rule_name: Some(self.name().to_string()),
1532                        message: match config.reflow_mode {
1533                            ReflowMode::Normalize => format!(
1534                                "Paragraph could be normalized to use line length of {} characters",
1535                                config.line_length.get()
1536                            ),
1537                            ReflowMode::SentencePerLine => {
1538                                let num_sentences = split_into_sentences(&paragraph_text).len();
1539                                if paragraph_lines.len() == 1 {
1540                                    // Single line with multiple sentences
1541                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1542                                } else {
1543                                    let num_lines = paragraph_lines.len();
1544                                    // Multiple lines - could be split sentences or mixed
1545                                    format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1546                                }
1547                            },
1548                            ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1549                        },
1550                        line: warning_line,
1551                        column: 1,
1552                        end_line: warning_end_line,
1553                        end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1554                        severity: Severity::Warning,
1555                        fix: Some(crate::rule::Fix {
1556                            range: byte_range,
1557                            replacement,
1558                        }),
1559                    });
1560                }
1561            }
1562        }
1563
1564        warnings
1565    }
1566
1567    /// Calculate string length based on the configured length mode
1568    fn calculate_string_length(&self, s: &str) -> usize {
1569        match self.config.length_mode {
1570            LengthMode::Chars => s.chars().count(),
1571            LengthMode::Visual => s.width(),
1572            LengthMode::Bytes => s.len(),
1573        }
1574    }
1575
1576    /// Calculate effective line length excluding unbreakable URLs
1577    fn calculate_effective_length(&self, line: &str) -> usize {
1578        if self.config.strict {
1579            // In strict mode, count everything
1580            return self.calculate_string_length(line);
1581        }
1582
1583        // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1584        let bytes = line.as_bytes();
1585        if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1586            return self.calculate_string_length(line);
1587        }
1588
1589        // More precise check for URLs and links
1590        if !line.contains("http") && !line.contains('[') {
1591            return self.calculate_string_length(line);
1592        }
1593
1594        let mut effective_line = line.to_string();
1595
1596        // First handle markdown links to avoid double-counting URLs
1597        // Pattern: [text](very-long-url) -> [text](url)
1598        if line.contains('[') && line.contains("](") {
1599            for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1600                if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1601                    && url.as_str().len() > 15
1602                {
1603                    let replacement = format!("[{}](url)", text.as_str());
1604                    effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1605                }
1606            }
1607        }
1608
1609        // Then replace bare URLs with a placeholder of reasonable length
1610        // This allows lines with long URLs to pass if the rest of the content is reasonable
1611        if effective_line.contains("http") {
1612            for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1613                let url = url_match.as_str();
1614                // Skip if this URL is already part of a markdown link we handled
1615                if !effective_line.contains(&format!("({url})")) {
1616                    // Replace URL with placeholder that represents a "reasonable" URL length
1617                    // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1618                    let placeholder = "x".repeat(15.min(url.len()));
1619                    effective_line = effective_line.replacen(url, &placeholder, 1);
1620                }
1621            }
1622        }
1623
1624        self.calculate_string_length(&effective_line)
1625    }
1626}