rumdl_lib/rules/md013_line_length/
mod.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::range_utils::LineIndex;
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9    IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18    extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19    split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30    pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35        Self {
36            config: MD013Config {
37                line_length: crate::types::LineLength::new(line_length),
38                code_blocks,
39                tables,
40                headings,
41                paragraphs: true, // Default to true for backwards compatibility
42                strict,
43                reflow: false,
44                reflow_mode: ReflowMode::default(),
45                length_mode: LengthMode::default(),
46                abbreviations: None,
47            },
48        }
49    }
50
51    pub fn from_config_struct(config: MD013Config) -> Self {
52        Self { config }
53    }
54
55    fn should_ignore_line(
56        &self,
57        line: &str,
58        _lines: &[&str],
59        current_line: usize,
60        ctx: &crate::lint_context::LintContext,
61    ) -> bool {
62        if self.config.strict {
63            return false;
64        }
65
66        // Quick check for common patterns before expensive regex
67        let trimmed = line.trim();
68
69        // Only skip if the entire line is a URL (quick check first)
70        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71            return true;
72        }
73
74        // Only skip if the entire line is an image reference (quick check first)
75        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76            return true;
77        }
78
79        // Only skip if the entire line is a link reference (quick check first)
80        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81            return true;
82        }
83
84        // Code blocks with long strings (only check if in code block)
85        if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86            && !trimmed.is_empty()
87            && !line.contains(' ')
88            && !line.contains('\t')
89        {
90            return true;
91        }
92
93        false
94    }
95}
96
97impl Rule for MD013LineLength {
98    fn name(&self) -> &'static str {
99        "MD013"
100    }
101
102    fn description(&self) -> &'static str {
103        "Line length should not be excessive"
104    }
105
106    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
107        let content = ctx.content;
108
109        // Fast early return using should_skip
110        // But don't skip if we're in reflow mode with Normalize or SentencePerLine
111        if self.should_skip(ctx)
112            && !(self.config.reflow
113                && (self.config.reflow_mode == ReflowMode::Normalize
114                    || self.config.reflow_mode == ReflowMode::SentencePerLine))
115        {
116            return Ok(Vec::new());
117        }
118
119        // Direct implementation without DocumentStructure
120        let mut warnings = Vec::new();
121
122        // Check for inline configuration overrides
123        let inline_config = crate::inline_config::InlineConfig::from_content(content);
124        let config_override = inline_config.get_rule_config("MD013");
125
126        // Apply configuration override if present
127        let effective_config = if let Some(json_config) = config_override {
128            if let Some(obj) = json_config.as_object() {
129                let mut config = self.config.clone();
130                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
131                    config.line_length = crate::types::LineLength::new(line_length as usize);
132                }
133                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
134                    config.code_blocks = code_blocks;
135                }
136                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
137                    config.tables = tables;
138                }
139                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
140                    config.headings = headings;
141                }
142                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
143                    config.strict = strict;
144                }
145                if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
146                    config.reflow = reflow;
147                }
148                if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
149                    config.reflow_mode = match reflow_mode {
150                        "default" => ReflowMode::Default,
151                        "normalize" => ReflowMode::Normalize,
152                        "sentence-per-line" => ReflowMode::SentencePerLine,
153                        _ => ReflowMode::default(),
154                    };
155                }
156                config
157            } else {
158                self.config.clone()
159            }
160        } else {
161            self.config.clone()
162        };
163
164        // Special handling: line_length = 0 means "no line length limit"
165        // Skip all line length checks, but still allow reflow if enabled
166        let skip_length_checks = effective_config.line_length.is_unlimited();
167
168        // Pre-filter lines that could be problematic to avoid processing all lines
169        let mut candidate_lines = Vec::new();
170        if !skip_length_checks {
171            for (line_idx, line_info) in ctx.lines.iter().enumerate() {
172                // Skip front matter - it should never be linted
173                if line_info.in_front_matter {
174                    continue;
175                }
176
177                // Quick length check first
178                if line_info.byte_len > effective_config.line_length.get() {
179                    candidate_lines.push(line_idx);
180                }
181            }
182        }
183
184        // If no candidate lines and not in normalize or sentence-per-line mode, early return
185        if candidate_lines.is_empty()
186            && !(effective_config.reflow
187                && (effective_config.reflow_mode == ReflowMode::Normalize
188                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
189        {
190            return Ok(warnings);
191        }
192
193        // Use ctx.lines if available for better performance
194        let lines: Vec<&str> = if !ctx.lines.is_empty() {
195            ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
196        } else {
197            content.lines().collect()
198        };
199
200        // Create a quick lookup set for heading lines
201        // We need this for both the heading skip check AND the paragraphs check
202        let heading_lines_set: std::collections::HashSet<usize> = ctx
203            .lines
204            .iter()
205            .enumerate()
206            .filter(|(_, line)| line.heading.is_some())
207            .map(|(idx, _)| idx + 1)
208            .collect();
209
210        // Use pre-computed table blocks from context
211        // We need this for both the table skip check AND the paragraphs check
212        let table_blocks = &ctx.table_blocks;
213        let mut table_lines_set = std::collections::HashSet::new();
214        for table in table_blocks {
215            table_lines_set.insert(table.header_line + 1);
216            table_lines_set.insert(table.delimiter_line + 1);
217            for &line in &table.content_lines {
218                table_lines_set.insert(line + 1);
219            }
220        }
221
222        // Process candidate lines for line length checks
223        for &line_idx in &candidate_lines {
224            let line_number = line_idx + 1;
225            let line = lines[line_idx];
226
227            // Calculate effective length excluding unbreakable URLs
228            let effective_length = self.calculate_effective_length(line);
229
230            // Use single line length limit for all content
231            let line_limit = effective_config.line_length.get();
232
233            // Skip short lines immediately (double-check after effective length calculation)
234            if effective_length <= line_limit {
235                continue;
236            }
237
238            // Skip mkdocstrings blocks (already handled by LintContext)
239            if ctx.lines[line_idx].in_mkdocstrings {
240                continue;
241            }
242
243            // Skip various block types efficiently
244            if !effective_config.strict {
245                // Skip setext heading underlines
246                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
247                    continue;
248                }
249
250                // Skip block elements according to config flags
251                // The flags mean: true = check these elements, false = skip these elements
252                // So we skip when the flag is FALSE and the line is in that element type
253                if (!effective_config.headings && heading_lines_set.contains(&line_number))
254                    || (!effective_config.code_blocks
255                        && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
256                    || (!effective_config.tables && table_lines_set.contains(&line_number))
257                    || ctx.lines[line_number - 1].blockquote.is_some()
258                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
259                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
260                    || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
261                {
262                    continue;
263                }
264
265                // Check if this is a paragraph/regular text line
266                // If paragraphs = false, skip lines that are NOT in special blocks
267                if !effective_config.paragraphs {
268                    let is_special_block = heading_lines_set.contains(&line_number)
269                        || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
270                        || table_lines_set.contains(&line_number)
271                        || ctx.lines[line_number - 1].blockquote.is_some()
272                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
273                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
274                        || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block);
275
276                    // Skip regular paragraph text when paragraphs = false
277                    if !is_special_block {
278                        continue;
279                    }
280                }
281
282                // Skip lines that are only a URL, image ref, or link ref
283                if self.should_ignore_line(line, &lines, line_idx, ctx) {
284                    continue;
285                }
286            }
287
288            // In sentence-per-line mode, check if this is a single long sentence
289            // If so, emit a warning without a fix (user must manually rephrase)
290            if effective_config.reflow_mode == ReflowMode::SentencePerLine {
291                let sentences = split_into_sentences(line.trim());
292                if sentences.len() == 1 {
293                    // Single sentence that's too long - warn but don't auto-fix
294                    let message = format!("Line length {effective_length} exceeds {line_limit} characters");
295
296                    let (start_line, start_col, end_line, end_col) =
297                        calculate_excess_range(line_number, line, line_limit);
298
299                    warnings.push(LintWarning {
300                        rule_name: Some(self.name().to_string()),
301                        message,
302                        line: start_line,
303                        column: start_col,
304                        end_line,
305                        end_column: end_col,
306                        severity: Severity::Warning,
307                        fix: None, // No auto-fix for long single sentences
308                    });
309                    continue;
310                }
311                // Multiple sentences will be handled by paragraph-based reflow
312                continue;
313            }
314
315            // Don't provide fix for individual lines when reflow is enabled
316            // Paragraph-based fixes will be handled separately
317            let fix = None;
318
319            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
320
321            // Calculate precise character range for the excess portion
322            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
323
324            warnings.push(LintWarning {
325                rule_name: Some(self.name().to_string()),
326                message,
327                line: start_line,
328                column: start_col,
329                end_line,
330                end_column: end_col,
331                severity: Severity::Warning,
332                fix,
333            });
334        }
335
336        // If reflow is enabled, generate paragraph-based fixes
337        if effective_config.reflow {
338            let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
339            // Merge paragraph warnings with line warnings, removing duplicates
340            for pw in paragraph_warnings {
341                // Remove any line warnings that overlap with this paragraph
342                warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
343                warnings.push(pw);
344            }
345        }
346
347        Ok(warnings)
348    }
349
350    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
351        // For CLI usage, apply fixes from warnings
352        // LSP will use the warning-based fixes directly
353        let warnings = self.check(ctx)?;
354
355        // If there are no fixes, return content unchanged
356        if !warnings.iter().any(|w| w.fix.is_some()) {
357            return Ok(ctx.content.to_string());
358        }
359
360        // Apply warning-based fixes
361        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
362            .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
363    }
364
365    fn as_any(&self) -> &dyn std::any::Any {
366        self
367    }
368
369    fn category(&self) -> RuleCategory {
370        RuleCategory::Whitespace
371    }
372
373    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
374        // Skip if content is empty
375        if ctx.content.is_empty() {
376            return true;
377        }
378
379        // For sentence-per-line or normalize mode, never skip based on line length
380        if self.config.reflow
381            && (self.config.reflow_mode == ReflowMode::SentencePerLine
382                || self.config.reflow_mode == ReflowMode::Normalize)
383        {
384            return false;
385        }
386
387        // Quick check: if total content is shorter than line limit, definitely skip
388        if ctx.content.len() <= self.config.line_length.get() {
389            return true;
390        }
391
392        // Use more efficient check - any() with early termination instead of all()
393        !ctx.lines
394            .iter()
395            .any(|line| line.byte_len > self.config.line_length.get())
396    }
397
398    fn default_config_section(&self) -> Option<(String, toml::Value)> {
399        let default_config = MD013Config::default();
400        let json_value = serde_json::to_value(&default_config).ok()?;
401        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
402
403        if let toml::Value::Table(table) = toml_value {
404            if !table.is_empty() {
405                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
406            } else {
407                None
408            }
409        } else {
410            None
411        }
412    }
413
414    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
415        let mut aliases = std::collections::HashMap::new();
416        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
417        Some(aliases)
418    }
419
420    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
421    where
422        Self: Sized,
423    {
424        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
425        // Use global line_length if rule-specific config still has default value
426        if rule_config.line_length.get() == 80 {
427            rule_config.line_length = config.global.line_length;
428        }
429        Box::new(Self::from_config_struct(rule_config))
430    }
431}
432
433impl MD013LineLength {
434    /// Generate paragraph-based fixes
435    fn generate_paragraph_fixes(
436        &self,
437        ctx: &crate::lint_context::LintContext,
438        config: &MD013Config,
439        lines: &[&str],
440    ) -> Vec<LintWarning> {
441        let mut warnings = Vec::new();
442        let line_index = LineIndex::new(ctx.content);
443
444        let mut i = 0;
445        while i < lines.len() {
446            let line_num = i + 1;
447
448            // Skip special structures
449            let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
450                info.in_code_block
451                    || info.in_front_matter
452                    || info.in_html_block
453                    || info.in_html_comment
454                    || info.in_esm_block
455            });
456
457            if should_skip_due_to_line_info
458                || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
459                || lines[i].trim().starts_with('#')
460                || TableUtils::is_potential_table_row(lines[i])
461                || lines[i].trim().is_empty()
462                || is_horizontal_rule(lines[i].trim())
463                || is_template_directive_only(lines[i])
464            {
465                i += 1;
466                continue;
467            }
468
469            // Helper function to detect semantic line markers
470            let is_semantic_line = |content: &str| -> bool {
471                let trimmed = content.trim_start();
472                let semantic_markers = [
473                    "NOTE:",
474                    "WARNING:",
475                    "IMPORTANT:",
476                    "CAUTION:",
477                    "TIP:",
478                    "DANGER:",
479                    "HINT:",
480                    "INFO:",
481                ];
482                semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
483            };
484
485            // Helper function to detect fence markers (opening or closing)
486            let is_fence_marker = |content: &str| -> bool {
487                let trimmed = content.trim_start();
488                trimmed.starts_with("```") || trimmed.starts_with("~~~")
489            };
490
491            // Check if this is a list item - handle it specially
492            let trimmed = lines[i].trim();
493            if is_list_item(trimmed) {
494                // Collect the entire list item including continuation lines
495                let list_start = i;
496                let (marker, first_content) = extract_list_marker_and_content(lines[i]);
497                let marker_len = marker.len();
498
499                // Track lines and their types (content, code block, fence, nested list)
500                #[derive(Clone)]
501                enum LineType {
502                    Content(String),
503                    CodeBlock(String, usize),      // content and original indent
504                    NestedListItem(String, usize), // full line content and original indent
505                    SemanticLine(String),          // Lines starting with NOTE:, WARNING:, etc that should stay separate
506                    Empty,
507                }
508
509                let mut actual_indent: Option<usize> = None;
510                let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
511                i += 1;
512
513                // Collect continuation lines using ctx.lines for metadata
514                while i < lines.len() {
515                    let line_info = &ctx.lines[i];
516
517                    // Use pre-computed is_blank from ctx
518                    if line_info.is_blank {
519                        // Empty line - check if next line is indented (part of list item)
520                        if i + 1 < lines.len() {
521                            let next_info = &ctx.lines[i + 1];
522
523                            // Check if next line is indented enough to be continuation
524                            if !next_info.is_blank && next_info.indent >= marker_len {
525                                // This blank line is between paragraphs/blocks in the list item
526                                list_item_lines.push(LineType::Empty);
527                                i += 1;
528                                continue;
529                            }
530                        }
531                        // No indented line after blank, end of list item
532                        break;
533                    }
534
535                    // Use pre-computed indent from ctx
536                    let indent = line_info.indent;
537
538                    // Valid continuation must be indented at least marker_len
539                    if indent >= marker_len {
540                        let trimmed = line_info.content(ctx.content).trim();
541
542                        // Use pre-computed in_code_block from ctx
543                        if line_info.in_code_block {
544                            list_item_lines.push(LineType::CodeBlock(
545                                line_info.content(ctx.content)[indent..].to_string(),
546                                indent,
547                            ));
548                            i += 1;
549                            continue;
550                        }
551
552                        // Check if this is a SIBLING list item (breaks parent)
553                        // Nested lists are indented >= marker_len and are PART of the parent item
554                        // Siblings are at indent < marker_len (at or before parent marker)
555                        if is_list_item(trimmed) && indent < marker_len {
556                            // This is a sibling item at same or higher level - end parent item
557                            break;
558                        }
559
560                        // Check if this is a NESTED list item marker
561                        // Nested lists should be processed separately UNLESS they're part of a
562                        // multi-paragraph list item (indicated by a blank line before them OR
563                        // it's a continuation of an already-started nested list)
564                        if is_list_item(trimmed) && indent >= marker_len {
565                            // Check if there was a blank line before this (multi-paragraph context)
566                            let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
567
568                            // Check if we've already seen nested list content (another nested item)
569                            let has_nested_content = list_item_lines.iter().any(|line| {
570                                matches!(line, LineType::Content(c) if is_list_item(c.trim()))
571                                    || matches!(line, LineType::NestedListItem(_, _))
572                            });
573
574                            if !has_blank_before && !has_nested_content {
575                                // Single-paragraph context with no prior nested items: starts a new item
576                                // End parent collection; nested list will be processed next
577                                break;
578                            }
579                            // else: multi-paragraph context or continuation of nested list, keep collecting
580                            // Mark this as a nested list item to preserve its structure
581                            list_item_lines.push(LineType::NestedListItem(
582                                line_info.content(ctx.content)[indent..].to_string(),
583                                indent,
584                            ));
585                            i += 1;
586                            continue;
587                        }
588
589                        // Normal continuation: marker_len to marker_len+3
590                        if indent <= marker_len + 3 {
591                            // Set actual_indent from first non-code continuation if not set
592                            if actual_indent.is_none() {
593                                actual_indent = Some(indent);
594                            }
595
596                            // Extract content (remove indentation and trailing whitespace)
597                            // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
598                            // See: https://github.com/rvben/rumdl/issues/76
599                            let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
600
601                            // Check if this is a fence marker (opening or closing)
602                            // These should be treated as code block lines, not paragraph content
603                            if is_fence_marker(&content) {
604                                list_item_lines.push(LineType::CodeBlock(content, indent));
605                            }
606                            // Check if this is a semantic line (NOTE:, WARNING:, etc.)
607                            else if is_semantic_line(&content) {
608                                list_item_lines.push(LineType::SemanticLine(content));
609                            } else {
610                                list_item_lines.push(LineType::Content(content));
611                            }
612                            i += 1;
613                        } else {
614                            // indent >= marker_len + 4: indented code block
615                            list_item_lines.push(LineType::CodeBlock(
616                                line_info.content(ctx.content)[indent..].to_string(),
617                                indent,
618                            ));
619                            i += 1;
620                        }
621                    } else {
622                        // Not indented enough, end of list item
623                        break;
624                    }
625                }
626
627                // Use detected indent or fallback to marker length
628                let indent_size = actual_indent.unwrap_or(marker_len);
629                let expected_indent = " ".repeat(indent_size);
630
631                // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
632                #[derive(Clone)]
633                enum Block {
634                    Paragraph(Vec<String>),
635                    Code {
636                        lines: Vec<(String, usize)>, // (content, indent) pairs
637                        has_preceding_blank: bool,   // Whether there was a blank line before this block
638                    },
639                    NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
640                    SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
641                    Html {
642                        lines: Vec<String>,        // HTML content preserved exactly as-is
643                        has_preceding_blank: bool, // Whether there was a blank line before this block
644                    },
645                }
646
647                // HTML tag detection helpers
648                // Block-level HTML tags that should trigger HTML block detection
649                const BLOCK_LEVEL_TAGS: &[&str] = &[
650                    "div",
651                    "details",
652                    "summary",
653                    "section",
654                    "article",
655                    "header",
656                    "footer",
657                    "nav",
658                    "aside",
659                    "main",
660                    "table",
661                    "thead",
662                    "tbody",
663                    "tfoot",
664                    "tr",
665                    "td",
666                    "th",
667                    "ul",
668                    "ol",
669                    "li",
670                    "dl",
671                    "dt",
672                    "dd",
673                    "pre",
674                    "blockquote",
675                    "figure",
676                    "figcaption",
677                    "form",
678                    "fieldset",
679                    "legend",
680                    "hr",
681                    "p",
682                    "h1",
683                    "h2",
684                    "h3",
685                    "h4",
686                    "h5",
687                    "h6",
688                    "style",
689                    "script",
690                    "noscript",
691                ];
692
693                fn is_block_html_opening_tag(line: &str) -> Option<String> {
694                    let trimmed = line.trim();
695
696                    // Check for HTML comments
697                    if trimmed.starts_with("<!--") {
698                        return Some("!--".to_string());
699                    }
700
701                    // Check for opening tags
702                    if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
703                        // Extract tag name from <tagname ...> or <tagname>
704                        let after_bracket = &trimmed[1..];
705                        if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
706                            let tag_name = after_bracket[..end].to_lowercase();
707
708                            // Only treat as block if it's a known block-level tag
709                            if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
710                                return Some(tag_name);
711                            }
712                        }
713                    }
714                    None
715                }
716
717                fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
718                    let trimmed = line.trim();
719
720                    // Special handling for HTML comments
721                    if tag_name == "!--" {
722                        return trimmed.ends_with("-->");
723                    }
724
725                    // Check for closing tags: </tagname> or </tagname ...>
726                    trimmed.starts_with(&format!("</{tag_name}>"))
727                        || trimmed.starts_with(&format!("</{tag_name}  "))
728                        || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
729                }
730
731                fn is_self_closing_tag(line: &str) -> bool {
732                    let trimmed = line.trim();
733                    trimmed.ends_with("/>")
734                }
735
736                let mut blocks: Vec<Block> = Vec::new();
737                let mut current_paragraph: Vec<String> = Vec::new();
738                let mut current_code_block: Vec<(String, usize)> = Vec::new();
739                let mut current_nested_list: Vec<(String, usize)> = Vec::new();
740                let mut current_html_block: Vec<String> = Vec::new();
741                let mut html_tag_stack: Vec<String> = Vec::new();
742                let mut in_code = false;
743                let mut in_nested_list = false;
744                let mut in_html_block = false;
745                let mut had_preceding_blank = false; // Track if we just saw an empty line
746                let mut code_block_has_preceding_blank = false; // Track blank before current code block
747                let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
748
749                for line in &list_item_lines {
750                    match line {
751                        LineType::Empty => {
752                            if in_code {
753                                current_code_block.push((String::new(), 0));
754                            } else if in_nested_list {
755                                current_nested_list.push((String::new(), 0));
756                            } else if in_html_block {
757                                // Allow blank lines inside HTML blocks
758                                current_html_block.push(String::new());
759                            } else if !current_paragraph.is_empty() {
760                                blocks.push(Block::Paragraph(current_paragraph.clone()));
761                                current_paragraph.clear();
762                            }
763                            // Mark that we saw a blank line
764                            had_preceding_blank = true;
765                        }
766                        LineType::Content(content) => {
767                            // Check if we're currently in an HTML block
768                            if in_html_block {
769                                current_html_block.push(content.clone());
770
771                                // Check if this line closes any open HTML tags
772                                if let Some(last_tag) = html_tag_stack.last() {
773                                    if is_html_closing_tag(content, last_tag) {
774                                        html_tag_stack.pop();
775
776                                        // If stack is empty, HTML block is complete
777                                        if html_tag_stack.is_empty() {
778                                            blocks.push(Block::Html {
779                                                lines: current_html_block.clone(),
780                                                has_preceding_blank: html_block_has_preceding_blank,
781                                            });
782                                            current_html_block.clear();
783                                            in_html_block = false;
784                                        }
785                                    } else if let Some(new_tag) = is_block_html_opening_tag(content) {
786                                        // Nested opening tag within HTML block
787                                        if !is_self_closing_tag(content) {
788                                            html_tag_stack.push(new_tag);
789                                        }
790                                    }
791                                }
792                                had_preceding_blank = false;
793                            } else {
794                                // Not in HTML block - check if this line starts one
795                                if let Some(tag_name) = is_block_html_opening_tag(content) {
796                                    // Flush current paragraph before starting HTML block
797                                    if in_code {
798                                        blocks.push(Block::Code {
799                                            lines: current_code_block.clone(),
800                                            has_preceding_blank: code_block_has_preceding_blank,
801                                        });
802                                        current_code_block.clear();
803                                        in_code = false;
804                                    } else if in_nested_list {
805                                        blocks.push(Block::NestedList(current_nested_list.clone()));
806                                        current_nested_list.clear();
807                                        in_nested_list = false;
808                                    } else if !current_paragraph.is_empty() {
809                                        blocks.push(Block::Paragraph(current_paragraph.clone()));
810                                        current_paragraph.clear();
811                                    }
812
813                                    // Start new HTML block
814                                    in_html_block = true;
815                                    html_block_has_preceding_blank = had_preceding_blank;
816                                    current_html_block.push(content.clone());
817
818                                    // Check if it's self-closing or needs a closing tag
819                                    if is_self_closing_tag(content) {
820                                        // Self-closing tag - complete the HTML block immediately
821                                        blocks.push(Block::Html {
822                                            lines: current_html_block.clone(),
823                                            has_preceding_blank: html_block_has_preceding_blank,
824                                        });
825                                        current_html_block.clear();
826                                        in_html_block = false;
827                                    } else {
828                                        // Regular opening tag - push to stack
829                                        html_tag_stack.push(tag_name);
830                                    }
831                                } else {
832                                    // Regular content line - add to paragraph
833                                    if in_code {
834                                        // Switching from code to content
835                                        blocks.push(Block::Code {
836                                            lines: current_code_block.clone(),
837                                            has_preceding_blank: code_block_has_preceding_blank,
838                                        });
839                                        current_code_block.clear();
840                                        in_code = false;
841                                    } else if in_nested_list {
842                                        // Switching from nested list to content
843                                        blocks.push(Block::NestedList(current_nested_list.clone()));
844                                        current_nested_list.clear();
845                                        in_nested_list = false;
846                                    }
847                                    current_paragraph.push(content.clone());
848                                }
849                                had_preceding_blank = false; // Reset after content
850                            }
851                        }
852                        LineType::CodeBlock(content, indent) => {
853                            if in_nested_list {
854                                // Switching from nested list to code
855                                blocks.push(Block::NestedList(current_nested_list.clone()));
856                                current_nested_list.clear();
857                                in_nested_list = false;
858                            } else if in_html_block {
859                                // Switching from HTML block to code (shouldn't happen normally, but handle it)
860                                blocks.push(Block::Html {
861                                    lines: current_html_block.clone(),
862                                    has_preceding_blank: html_block_has_preceding_blank,
863                                });
864                                current_html_block.clear();
865                                html_tag_stack.clear();
866                                in_html_block = false;
867                            }
868                            if !in_code {
869                                // Switching from content to code
870                                if !current_paragraph.is_empty() {
871                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
872                                    current_paragraph.clear();
873                                }
874                                in_code = true;
875                                // Record whether there was a blank line before this code block
876                                code_block_has_preceding_blank = had_preceding_blank;
877                            }
878                            current_code_block.push((content.clone(), *indent));
879                            had_preceding_blank = false; // Reset after code
880                        }
881                        LineType::NestedListItem(content, indent) => {
882                            if in_code {
883                                // Switching from code to nested list
884                                blocks.push(Block::Code {
885                                    lines: current_code_block.clone(),
886                                    has_preceding_blank: code_block_has_preceding_blank,
887                                });
888                                current_code_block.clear();
889                                in_code = false;
890                            } else if in_html_block {
891                                // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
892                                blocks.push(Block::Html {
893                                    lines: current_html_block.clone(),
894                                    has_preceding_blank: html_block_has_preceding_blank,
895                                });
896                                current_html_block.clear();
897                                html_tag_stack.clear();
898                                in_html_block = false;
899                            }
900                            if !in_nested_list {
901                                // Switching from content to nested list
902                                if !current_paragraph.is_empty() {
903                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
904                                    current_paragraph.clear();
905                                }
906                                in_nested_list = true;
907                            }
908                            current_nested_list.push((content.clone(), *indent));
909                            had_preceding_blank = false; // Reset after nested list
910                        }
911                        LineType::SemanticLine(content) => {
912                            // Semantic lines are standalone - flush any current block and add as separate block
913                            if in_code {
914                                blocks.push(Block::Code {
915                                    lines: current_code_block.clone(),
916                                    has_preceding_blank: code_block_has_preceding_blank,
917                                });
918                                current_code_block.clear();
919                                in_code = false;
920                            } else if in_nested_list {
921                                blocks.push(Block::NestedList(current_nested_list.clone()));
922                                current_nested_list.clear();
923                                in_nested_list = false;
924                            } else if in_html_block {
925                                blocks.push(Block::Html {
926                                    lines: current_html_block.clone(),
927                                    has_preceding_blank: html_block_has_preceding_blank,
928                                });
929                                current_html_block.clear();
930                                html_tag_stack.clear();
931                                in_html_block = false;
932                            } else if !current_paragraph.is_empty() {
933                                blocks.push(Block::Paragraph(current_paragraph.clone()));
934                                current_paragraph.clear();
935                            }
936                            // Add semantic line as its own block
937                            blocks.push(Block::SemanticLine(content.clone()));
938                            had_preceding_blank = false; // Reset after semantic line
939                        }
940                    }
941                }
942
943                // Push remaining block
944                if in_code && !current_code_block.is_empty() {
945                    blocks.push(Block::Code {
946                        lines: current_code_block,
947                        has_preceding_blank: code_block_has_preceding_blank,
948                    });
949                } else if in_nested_list && !current_nested_list.is_empty() {
950                    blocks.push(Block::NestedList(current_nested_list));
951                } else if in_html_block && !current_html_block.is_empty() {
952                    // If we still have an unclosed HTML block, push it anyway
953                    // (malformed HTML - missing closing tag)
954                    blocks.push(Block::Html {
955                        lines: current_html_block,
956                        has_preceding_blank: html_block_has_preceding_blank,
957                    });
958                } else if !current_paragraph.is_empty() {
959                    blocks.push(Block::Paragraph(current_paragraph));
960                }
961
962                // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
963                let content_lines: Vec<String> = list_item_lines
964                    .iter()
965                    .filter_map(|line| {
966                        if let LineType::Content(s) = line {
967                            Some(s.clone())
968                        } else {
969                            None
970                        }
971                    })
972                    .collect();
973
974                // Check if we need to reflow this list item
975                // We check the combined content to see if it exceeds length limits
976                let combined_content = content_lines.join(" ").trim().to_string();
977                let full_line = format!("{marker}{combined_content}");
978
979                // Helper to check if we should reflow in normalize mode
980                let should_normalize = || {
981                    // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
982                    // DO normalize if it has plain text content that spans multiple lines
983                    let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
984                    let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
985                    let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
986                    let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
987
988                    // If we have nested lists, code blocks, or semantic lines but no paragraphs, don't normalize
989                    if (has_nested_lists || has_code_blocks || has_semantic_lines) && !has_paragraphs {
990                        return false;
991                    }
992
993                    // If we have paragraphs, check if they span multiple lines or there are multiple blocks
994                    if has_paragraphs {
995                        let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
996                        if paragraph_count > 1 {
997                            // Multiple paragraph blocks should be normalized
998                            return true;
999                        }
1000
1001                        // Single paragraph block: normalize if it has multiple content lines
1002                        if content_lines.len() > 1 {
1003                            return true;
1004                        }
1005                    }
1006
1007                    false
1008                };
1009
1010                let needs_reflow = match config.reflow_mode {
1011                    ReflowMode::Normalize => {
1012                        // Only reflow if:
1013                        // 1. The combined line would exceed the limit, OR
1014                        // 2. The list item should be normalized (has multi-line plain text)
1015                        let combined_length = self.calculate_effective_length(&full_line);
1016                        if combined_length > config.line_length.get() {
1017                            true
1018                        } else {
1019                            should_normalize()
1020                        }
1021                    }
1022                    ReflowMode::SentencePerLine => {
1023                        // Check if list item has multiple sentences
1024                        let sentences = split_into_sentences(&combined_content);
1025                        sentences.len() > 1
1026                    }
1027                    ReflowMode::Default => {
1028                        // In default mode, only reflow if any individual line exceeds limit
1029                        // Check the original lines, not the combined content
1030                        (list_start..i)
1031                            .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1032                    }
1033                };
1034
1035                if needs_reflow {
1036                    let start_range = line_index.whole_line_range(list_start + 1);
1037                    let end_line = i - 1;
1038                    let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1039                        line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1040                    } else {
1041                        line_index.whole_line_range(end_line + 1)
1042                    };
1043                    let byte_range = start_range.start..end_range.end;
1044
1045                    // Reflow each block (paragraphs only, preserve code blocks)
1046                    // When line_length = 0 (no limit), use a very large value for reflow
1047                    let reflow_line_length = if config.line_length.is_unlimited() {
1048                        usize::MAX
1049                    } else {
1050                        config.line_length.get().saturating_sub(indent_size).max(1)
1051                    };
1052                    let reflow_options = crate::utils::text_reflow::ReflowOptions {
1053                        line_length: reflow_line_length,
1054                        break_on_sentences: true,
1055                        preserve_breaks: false,
1056                        sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1057                        abbreviations: config.abbreviations.clone(),
1058                    };
1059
1060                    let mut result: Vec<String> = Vec::new();
1061                    let mut is_first_block = true;
1062
1063                    for (block_idx, block) in blocks.iter().enumerate() {
1064                        match block {
1065                            Block::Paragraph(para_lines) => {
1066                                // Split the paragraph into segments at hard break boundaries
1067                                // Each segment can be reflowed independently
1068                                let segments = split_into_segments(para_lines);
1069
1070                                for (segment_idx, segment) in segments.iter().enumerate() {
1071                                    // Check if this segment ends with a hard break and what type
1072                                    let hard_break_type = segment.last().and_then(|line| {
1073                                        let line = line.strip_suffix('\r').unwrap_or(line);
1074                                        if line.ends_with('\\') {
1075                                            Some("\\")
1076                                        } else if line.ends_with("  ") {
1077                                            Some("  ")
1078                                        } else {
1079                                            None
1080                                        }
1081                                    });
1082
1083                                    // Join and reflow the segment (removing the hard break marker for processing)
1084                                    let segment_for_reflow: Vec<String> = segment
1085                                        .iter()
1086                                        .map(|line| {
1087                                            // Strip hard break marker (2 spaces or backslash) for reflow processing
1088                                            if line.ends_with('\\') {
1089                                                line[..line.len() - 1].trim_end().to_string()
1090                                            } else if line.ends_with("  ") {
1091                                                line[..line.len() - 2].trim_end().to_string()
1092                                            } else {
1093                                                line.clone()
1094                                            }
1095                                        })
1096                                        .collect();
1097
1098                                    let segment_text = segment_for_reflow.join(" ").trim().to_string();
1099                                    if !segment_text.is_empty() {
1100                                        let reflowed =
1101                                            crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1102
1103                                        if is_first_block && segment_idx == 0 {
1104                                            // First segment of first block starts with marker
1105                                            result.push(format!("{marker}{}", reflowed[0]));
1106                                            for line in reflowed.iter().skip(1) {
1107                                                result.push(format!("{expected_indent}{line}"));
1108                                            }
1109                                            is_first_block = false;
1110                                        } else {
1111                                            // Subsequent segments
1112                                            for line in reflowed {
1113                                                result.push(format!("{expected_indent}{line}"));
1114                                            }
1115                                        }
1116
1117                                        // If this segment had a hard break, add it back to the last line
1118                                        // Preserve the original hard break format (backslash or two spaces)
1119                                        if let Some(break_marker) = hard_break_type
1120                                            && let Some(last_line) = result.last_mut()
1121                                        {
1122                                            last_line.push_str(break_marker);
1123                                        }
1124                                    }
1125                                }
1126
1127                                // Add blank line after paragraph block if there's a next block
1128                                // BUT: check if next block is a code block that doesn't want a preceding blank
1129                                if block_idx < blocks.len() - 1 {
1130                                    let next_block = &blocks[block_idx + 1];
1131                                    let should_add_blank = match next_block {
1132                                        Block::Code {
1133                                            has_preceding_blank, ..
1134                                        } => *has_preceding_blank,
1135                                        _ => true, // For all other blocks, add blank line
1136                                    };
1137                                    if should_add_blank {
1138                                        result.push(String::new());
1139                                    }
1140                                }
1141                            }
1142                            Block::Code {
1143                                lines: code_lines,
1144                                has_preceding_blank: _,
1145                            } => {
1146                                // Preserve code blocks as-is with original indentation
1147                                // NOTE: Blank line before code block is handled by the previous block
1148                                // (see paragraph block's logic above)
1149
1150                                for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1151                                    if is_first_block && idx == 0 {
1152                                        // First line of first block gets marker
1153                                        result.push(format!(
1154                                            "{marker}{}",
1155                                            " ".repeat(orig_indent - marker_len) + content
1156                                        ));
1157                                        is_first_block = false;
1158                                    } else if content.is_empty() {
1159                                        result.push(String::new());
1160                                    } else {
1161                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1162                                    }
1163                                }
1164                            }
1165                            Block::NestedList(nested_items) => {
1166                                // Preserve nested list items as-is with original indentation
1167                                if !is_first_block {
1168                                    result.push(String::new());
1169                                }
1170
1171                                for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1172                                    if is_first_block && idx == 0 {
1173                                        // First line of first block gets marker
1174                                        result.push(format!(
1175                                            "{marker}{}",
1176                                            " ".repeat(orig_indent - marker_len) + content
1177                                        ));
1178                                        is_first_block = false;
1179                                    } else if content.is_empty() {
1180                                        result.push(String::new());
1181                                    } else {
1182                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1183                                    }
1184                                }
1185
1186                                // Add blank line after nested list if there's a next block
1187                                // Check if next block is a code block that doesn't want a preceding blank
1188                                if block_idx < blocks.len() - 1 {
1189                                    let next_block = &blocks[block_idx + 1];
1190                                    let should_add_blank = match next_block {
1191                                        Block::Code {
1192                                            has_preceding_blank, ..
1193                                        } => *has_preceding_blank,
1194                                        _ => true, // For all other blocks, add blank line
1195                                    };
1196                                    if should_add_blank {
1197                                        result.push(String::new());
1198                                    }
1199                                }
1200                            }
1201                            Block::SemanticLine(content) => {
1202                                // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1203                                // Add blank line before if not first block
1204                                if !is_first_block {
1205                                    result.push(String::new());
1206                                }
1207
1208                                if is_first_block {
1209                                    // First block starts with marker
1210                                    result.push(format!("{marker}{content}"));
1211                                    is_first_block = false;
1212                                } else {
1213                                    // Subsequent blocks use expected indent
1214                                    result.push(format!("{expected_indent}{content}"));
1215                                }
1216
1217                                // Add blank line after semantic line if there's a next block
1218                                // Check if next block is a code block that doesn't want a preceding blank
1219                                if block_idx < blocks.len() - 1 {
1220                                    let next_block = &blocks[block_idx + 1];
1221                                    let should_add_blank = match next_block {
1222                                        Block::Code {
1223                                            has_preceding_blank, ..
1224                                        } => *has_preceding_blank,
1225                                        _ => true, // For all other blocks, add blank line
1226                                    };
1227                                    if should_add_blank {
1228                                        result.push(String::new());
1229                                    }
1230                                }
1231                            }
1232                            Block::Html {
1233                                lines: html_lines,
1234                                has_preceding_blank: _,
1235                            } => {
1236                                // Preserve HTML blocks exactly as-is with original indentation
1237                                // NOTE: Blank line before HTML block is handled by the previous block
1238
1239                                for (idx, line) in html_lines.iter().enumerate() {
1240                                    if is_first_block && idx == 0 {
1241                                        // First line of first block gets marker
1242                                        result.push(format!("{marker}{line}"));
1243                                        is_first_block = false;
1244                                    } else if line.is_empty() {
1245                                        // Preserve blank lines inside HTML blocks
1246                                        result.push(String::new());
1247                                    } else {
1248                                        // Preserve lines with their original content (already includes indentation)
1249                                        result.push(format!("{expected_indent}{line}"));
1250                                    }
1251                                }
1252
1253                                // Add blank line after HTML block if there's a next block
1254                                if block_idx < blocks.len() - 1 {
1255                                    let next_block = &blocks[block_idx + 1];
1256                                    let should_add_blank = match next_block {
1257                                        Block::Code {
1258                                            has_preceding_blank, ..
1259                                        } => *has_preceding_blank,
1260                                        Block::Html {
1261                                            has_preceding_blank, ..
1262                                        } => *has_preceding_blank,
1263                                        _ => true, // For all other blocks, add blank line
1264                                    };
1265                                    if should_add_blank {
1266                                        result.push(String::new());
1267                                    }
1268                                }
1269                            }
1270                        }
1271                    }
1272
1273                    let reflowed_text = result.join("\n");
1274
1275                    // Preserve trailing newline
1276                    let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1277                        format!("{reflowed_text}\n")
1278                    } else {
1279                        reflowed_text
1280                    };
1281
1282                    // Get the original text to compare
1283                    let original_text = &ctx.content[byte_range.clone()];
1284
1285                    // Only generate a warning if the replacement is different from the original
1286                    if original_text != replacement {
1287                        // Generate an appropriate message based on why reflow is needed
1288                        let message = match config.reflow_mode {
1289                            ReflowMode::SentencePerLine => {
1290                                let num_sentences = split_into_sentences(&combined_content).len();
1291                                let num_lines = content_lines.len();
1292                                if num_lines == 1 {
1293                                    // Single line with multiple sentences
1294                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1295                                } else {
1296                                    // Multiple lines - could be split sentences or mixed
1297                                    format!(
1298                                        "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1299                                    )
1300                                }
1301                            }
1302                            ReflowMode::Normalize => {
1303                                let combined_length = self.calculate_effective_length(&full_line);
1304                                if combined_length > config.line_length.get() {
1305                                    format!(
1306                                        "Line length {} exceeds {} characters",
1307                                        combined_length,
1308                                        config.line_length.get()
1309                                    )
1310                                } else {
1311                                    "Multi-line content can be normalized".to_string()
1312                                }
1313                            }
1314                            ReflowMode::Default => {
1315                                let combined_length = self.calculate_effective_length(&full_line);
1316                                format!(
1317                                    "Line length {} exceeds {} characters",
1318                                    combined_length,
1319                                    config.line_length.get()
1320                                )
1321                            }
1322                        };
1323
1324                        warnings.push(LintWarning {
1325                            rule_name: Some(self.name().to_string()),
1326                            message,
1327                            line: list_start + 1,
1328                            column: 1,
1329                            end_line: end_line + 1,
1330                            end_column: lines[end_line].len() + 1,
1331                            severity: Severity::Warning,
1332                            fix: Some(crate::rule::Fix {
1333                                range: byte_range,
1334                                replacement,
1335                            }),
1336                        });
1337                    }
1338                }
1339                continue;
1340            }
1341
1342            // Found start of a paragraph - collect all lines in it
1343            let paragraph_start = i;
1344            let mut paragraph_lines = vec![lines[i]];
1345            i += 1;
1346
1347            while i < lines.len() {
1348                let next_line = lines[i];
1349                let next_line_num = i + 1;
1350                let next_trimmed = next_line.trim();
1351
1352                // Stop at paragraph boundaries
1353                if next_trimmed.is_empty()
1354                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1355                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1356                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1357                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1358                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1359                    || (next_line_num > 0
1360                        && next_line_num <= ctx.lines.len()
1361                        && ctx.lines[next_line_num - 1].blockquote.is_some())
1362                    || next_trimmed.starts_with('#')
1363                    || TableUtils::is_potential_table_row(next_line)
1364                    || is_list_item(next_trimmed)
1365                    || is_horizontal_rule(next_trimmed)
1366                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1367                    || is_template_directive_only(next_line)
1368                {
1369                    break;
1370                }
1371
1372                // Check if the previous line ends with a hard break (2+ spaces or backslash)
1373                if i > 0 && has_hard_break(lines[i - 1]) {
1374                    // Don't include lines after hard breaks in the same paragraph
1375                    break;
1376                }
1377
1378                paragraph_lines.push(next_line);
1379                i += 1;
1380            }
1381
1382            // Combine paragraph lines into a single string for processing
1383            // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1384            let paragraph_text = paragraph_lines.join(" ");
1385
1386            // Skip reflowing if this paragraph contains definition list items
1387            // Definition lists are multi-line structures that should not be joined
1388            let contains_definition_list = paragraph_lines
1389                .iter()
1390                .any(|line| crate::utils::is_definition_list_item(line));
1391
1392            if contains_definition_list {
1393                // Don't reflow definition lists - skip this paragraph
1394                i = paragraph_start + paragraph_lines.len();
1395                continue;
1396            }
1397
1398            // Check if this paragraph needs reflowing
1399            let needs_reflow = match config.reflow_mode {
1400                ReflowMode::Normalize => {
1401                    // In normalize mode, reflow multi-line paragraphs
1402                    paragraph_lines.len() > 1
1403                }
1404                ReflowMode::SentencePerLine => {
1405                    // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1406                    // Note: we check the joined text because sentences can span multiple lines
1407                    let sentences = split_into_sentences(&paragraph_text);
1408
1409                    // Always reflow if multiple sentences on one line
1410                    if sentences.len() > 1 {
1411                        true
1412                    } else if paragraph_lines.len() > 1 {
1413                        // For single-sentence paragraphs spanning multiple lines:
1414                        // Reflow if they COULD fit on one line (respecting line-length constraint)
1415                        if config.line_length.is_unlimited() {
1416                            // No line-length constraint - always join single sentences
1417                            true
1418                        } else {
1419                            // Only join if it fits within line-length
1420                            let effective_length = self.calculate_effective_length(&paragraph_text);
1421                            effective_length <= config.line_length.get()
1422                        }
1423                    } else {
1424                        false
1425                    }
1426                }
1427                ReflowMode::Default => {
1428                    // In default mode, only reflow if lines exceed limit
1429                    paragraph_lines
1430                        .iter()
1431                        .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1432                }
1433            };
1434
1435            if needs_reflow {
1436                // Calculate byte range for this paragraph
1437                // Use whole_line_range for each line and combine
1438                let start_range = line_index.whole_line_range(paragraph_start + 1);
1439                let end_line = paragraph_start + paragraph_lines.len() - 1;
1440
1441                // For the last line, we want to preserve any trailing newline
1442                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1443                    // Last line without trailing newline - use line_text_range
1444                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1445                } else {
1446                    // Not the last line or has trailing newline - use whole_line_range
1447                    line_index.whole_line_range(end_line + 1)
1448                };
1449
1450                let byte_range = start_range.start..end_range.end;
1451
1452                // Check if the paragraph ends with a hard break and what type
1453                let hard_break_type = paragraph_lines.last().and_then(|line| {
1454                    let line = line.strip_suffix('\r').unwrap_or(line);
1455                    if line.ends_with('\\') {
1456                        Some("\\")
1457                    } else if line.ends_with("  ") {
1458                        Some("  ")
1459                    } else {
1460                        None
1461                    }
1462                });
1463
1464                // Reflow the paragraph
1465                // When line_length = 0 (no limit), use a very large value for reflow
1466                let reflow_line_length = if config.line_length.is_unlimited() {
1467                    usize::MAX
1468                } else {
1469                    config.line_length.get()
1470                };
1471                let reflow_options = crate::utils::text_reflow::ReflowOptions {
1472                    line_length: reflow_line_length,
1473                    break_on_sentences: true,
1474                    preserve_breaks: false,
1475                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1476                    abbreviations: config.abbreviations.clone(),
1477                };
1478                let mut reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
1479
1480                // If the original paragraph ended with a hard break, preserve it
1481                // Preserve the original hard break format (backslash or two spaces)
1482                if let Some(break_marker) = hard_break_type
1483                    && !reflowed.is_empty()
1484                {
1485                    let last_idx = reflowed.len() - 1;
1486                    if !has_hard_break(&reflowed[last_idx]) {
1487                        reflowed[last_idx].push_str(break_marker);
1488                    }
1489                }
1490
1491                let reflowed_text = reflowed.join("\n");
1492
1493                // Preserve trailing newline if the original paragraph had one
1494                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1495                    format!("{reflowed_text}\n")
1496                } else {
1497                    reflowed_text
1498                };
1499
1500                // Get the original text to compare
1501                let original_text = &ctx.content[byte_range.clone()];
1502
1503                // Only generate a warning if the replacement is different from the original
1504                if original_text != replacement {
1505                    // Create warning with actual fix
1506                    // In default mode, report the specific line that violates
1507                    // In normalize mode, report the whole paragraph
1508                    // In sentence-per-line mode, report the entire paragraph
1509                    let (warning_line, warning_end_line) = match config.reflow_mode {
1510                        ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1511                        ReflowMode::SentencePerLine => {
1512                            // Highlight the entire paragraph that needs reformatting
1513                            (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1514                        }
1515                        ReflowMode::Default => {
1516                            // Find the first line that exceeds the limit
1517                            let mut violating_line = paragraph_start;
1518                            for (idx, line) in paragraph_lines.iter().enumerate() {
1519                                if self.calculate_effective_length(line) > config.line_length.get() {
1520                                    violating_line = paragraph_start + idx;
1521                                    break;
1522                                }
1523                            }
1524                            (violating_line + 1, violating_line + 1)
1525                        }
1526                    };
1527
1528                    warnings.push(LintWarning {
1529                        rule_name: Some(self.name().to_string()),
1530                        message: match config.reflow_mode {
1531                            ReflowMode::Normalize => format!(
1532                                "Paragraph could be normalized to use line length of {} characters",
1533                                config.line_length.get()
1534                            ),
1535                            ReflowMode::SentencePerLine => {
1536                                let num_sentences = split_into_sentences(&paragraph_text).len();
1537                                if paragraph_lines.len() == 1 {
1538                                    // Single line with multiple sentences
1539                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1540                                } else {
1541                                    let num_lines = paragraph_lines.len();
1542                                    // Multiple lines - could be split sentences or mixed
1543                                    format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1544                                }
1545                            },
1546                            ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1547                        },
1548                        line: warning_line,
1549                        column: 1,
1550                        end_line: warning_end_line,
1551                        end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1552                        severity: Severity::Warning,
1553                        fix: Some(crate::rule::Fix {
1554                            range: byte_range,
1555                            replacement,
1556                        }),
1557                    });
1558                }
1559            }
1560        }
1561
1562        warnings
1563    }
1564
1565    /// Calculate string length based on the configured length mode
1566    fn calculate_string_length(&self, s: &str) -> usize {
1567        match self.config.length_mode {
1568            LengthMode::Chars => s.chars().count(),
1569            LengthMode::Visual => s.width(),
1570            LengthMode::Bytes => s.len(),
1571        }
1572    }
1573
1574    /// Calculate effective line length excluding unbreakable URLs
1575    fn calculate_effective_length(&self, line: &str) -> usize {
1576        if self.config.strict {
1577            // In strict mode, count everything
1578            return self.calculate_string_length(line);
1579        }
1580
1581        // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1582        let bytes = line.as_bytes();
1583        if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1584            return self.calculate_string_length(line);
1585        }
1586
1587        // More precise check for URLs and links
1588        if !line.contains("http") && !line.contains('[') {
1589            return self.calculate_string_length(line);
1590        }
1591
1592        let mut effective_line = line.to_string();
1593
1594        // First handle markdown links to avoid double-counting URLs
1595        // Pattern: [text](very-long-url) -> [text](url)
1596        if line.contains('[') && line.contains("](") {
1597            for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1598                if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1599                    && url.as_str().len() > 15
1600                {
1601                    let replacement = format!("[{}](url)", text.as_str());
1602                    effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1603                }
1604            }
1605        }
1606
1607        // Then replace bare URLs with a placeholder of reasonable length
1608        // This allows lines with long URLs to pass if the rest of the content is reasonable
1609        if effective_line.contains("http") {
1610            for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1611                let url = url_match.as_str();
1612                // Skip if this URL is already part of a markdown link we handled
1613                if !effective_line.contains(&format!("({url})")) {
1614                    // Replace URL with placeholder that represents a "reasonable" URL length
1615                    // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1616                    let placeholder = "x".repeat(15.min(url.len()));
1617                    effective_line = effective_line.replacen(url, &placeholder, 1);
1618                }
1619            }
1620        }
1621
1622        self.calculate_string_length(&effective_line)
1623    }
1624}