rumdl_lib/rules/md013_line_length/
mod.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::range_utils::LineIndex;
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9    IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18    extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19    split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30    pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35        Self {
36            config: MD013Config {
37                line_length: crate::types::LineLength::new(line_length),
38                code_blocks,
39                tables,
40                headings,
41                paragraphs: true, // Default to true for backwards compatibility
42                strict,
43                reflow: false,
44                reflow_mode: ReflowMode::default(),
45                length_mode: LengthMode::default(),
46                abbreviations: None,
47            },
48        }
49    }
50
51    pub fn from_config_struct(config: MD013Config) -> Self {
52        Self { config }
53    }
54
55    fn should_ignore_line(
56        &self,
57        line: &str,
58        _lines: &[&str],
59        current_line: usize,
60        ctx: &crate::lint_context::LintContext,
61    ) -> bool {
62        if self.config.strict {
63            return false;
64        }
65
66        // Quick check for common patterns before expensive regex
67        let trimmed = line.trim();
68
69        // Only skip if the entire line is a URL (quick check first)
70        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71            return true;
72        }
73
74        // Only skip if the entire line is an image reference (quick check first)
75        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76            return true;
77        }
78
79        // Only skip if the entire line is a link reference (quick check first)
80        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81            return true;
82        }
83
84        // Code blocks with long strings (only check if in code block)
85        if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86            && !trimmed.is_empty()
87            && !line.contains(' ')
88            && !line.contains('\t')
89        {
90            return true;
91        }
92
93        false
94    }
95
96    /// Check if rule should skip based on provided config (used for inline config support)
97    fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
98        // Skip if content is empty
99        if ctx.content.is_empty() {
100            return true;
101        }
102
103        // For sentence-per-line or normalize mode, never skip based on line length
104        if config.reflow
105            && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
106        {
107            return false;
108        }
109
110        // Quick check: if total content is shorter than line limit, definitely skip
111        if ctx.content.len() <= config.line_length.get() {
112            return true;
113        }
114
115        // Skip if no line exceeds the limit
116        !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
117    }
118}
119
120impl Rule for MD013LineLength {
121    fn name(&self) -> &'static str {
122        "MD013"
123    }
124
125    fn description(&self) -> &'static str {
126        "Line length should not be excessive"
127    }
128
129    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
130        let content = ctx.content;
131
132        // Parse inline configuration FIRST so we can use effective config for should_skip
133        let inline_config = crate::inline_config::InlineConfig::from_content(content);
134        let config_override = inline_config.get_rule_config("MD013");
135
136        // Apply configuration override if present
137        let effective_config = if let Some(json_config) = config_override {
138            if let Some(obj) = json_config.as_object() {
139                let mut config = self.config.clone();
140                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
141                    config.line_length = crate::types::LineLength::new(line_length as usize);
142                }
143                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
144                    config.code_blocks = code_blocks;
145                }
146                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
147                    config.tables = tables;
148                }
149                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
150                    config.headings = headings;
151                }
152                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
153                    config.strict = strict;
154                }
155                if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
156                    config.reflow = reflow;
157                }
158                if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
159                    config.reflow_mode = match reflow_mode {
160                        "default" => ReflowMode::Default,
161                        "normalize" => ReflowMode::Normalize,
162                        "sentence-per-line" => ReflowMode::SentencePerLine,
163                        _ => ReflowMode::default(),
164                    };
165                }
166                config
167            } else {
168                self.config.clone()
169            }
170        } else {
171            self.config.clone()
172        };
173
174        // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
175        // But don't skip if we're in reflow mode with Normalize or SentencePerLine
176        if self.should_skip_with_config(ctx, &effective_config)
177            && !(effective_config.reflow
178                && (effective_config.reflow_mode == ReflowMode::Normalize
179                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
180        {
181            return Ok(Vec::new());
182        }
183
184        // Direct implementation without DocumentStructure
185        let mut warnings = Vec::new();
186
187        // Special handling: line_length = 0 means "no line length limit"
188        // Skip all line length checks, but still allow reflow if enabled
189        let skip_length_checks = effective_config.line_length.is_unlimited();
190
191        // Pre-filter lines that could be problematic to avoid processing all lines
192        let mut candidate_lines = Vec::new();
193        if !skip_length_checks {
194            for (line_idx, line_info) in ctx.lines.iter().enumerate() {
195                // Skip front matter - it should never be linted
196                if line_info.in_front_matter {
197                    continue;
198                }
199
200                // Quick length check first
201                if line_info.byte_len > effective_config.line_length.get() {
202                    candidate_lines.push(line_idx);
203                }
204            }
205        }
206
207        // If no candidate lines and not in normalize or sentence-per-line mode, early return
208        if candidate_lines.is_empty()
209            && !(effective_config.reflow
210                && (effective_config.reflow_mode == ReflowMode::Normalize
211                    || effective_config.reflow_mode == ReflowMode::SentencePerLine))
212        {
213            return Ok(warnings);
214        }
215
216        // Use ctx.lines if available for better performance
217        let lines: Vec<&str> = if !ctx.lines.is_empty() {
218            ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
219        } else {
220            content.lines().collect()
221        };
222
223        // Create a quick lookup set for heading lines
224        // We need this for both the heading skip check AND the paragraphs check
225        let heading_lines_set: std::collections::HashSet<usize> = ctx
226            .lines
227            .iter()
228            .enumerate()
229            .filter(|(_, line)| line.heading.is_some())
230            .map(|(idx, _)| idx + 1)
231            .collect();
232
233        // Use pre-computed table blocks from context
234        // We need this for both the table skip check AND the paragraphs check
235        let table_blocks = &ctx.table_blocks;
236        let mut table_lines_set = std::collections::HashSet::new();
237        for table in table_blocks {
238            table_lines_set.insert(table.header_line + 1);
239            table_lines_set.insert(table.delimiter_line + 1);
240            for &line in &table.content_lines {
241                table_lines_set.insert(line + 1);
242            }
243        }
244
245        // Process candidate lines for line length checks
246        for &line_idx in &candidate_lines {
247            let line_number = line_idx + 1;
248            let line = lines[line_idx];
249
250            // Calculate effective length excluding unbreakable URLs
251            let effective_length = self.calculate_effective_length(line);
252
253            // Use single line length limit for all content
254            let line_limit = effective_config.line_length.get();
255
256            // Skip short lines immediately (double-check after effective length calculation)
257            if effective_length <= line_limit {
258                continue;
259            }
260
261            // Skip mkdocstrings blocks (already handled by LintContext)
262            if ctx.lines[line_idx].in_mkdocstrings {
263                continue;
264            }
265
266            // Skip various block types efficiently
267            if !effective_config.strict {
268                // Skip setext heading underlines
269                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
270                    continue;
271                }
272
273                // Skip block elements according to config flags
274                // The flags mean: true = check these elements, false = skip these elements
275                // So we skip when the flag is FALSE and the line is in that element type
276                if (!effective_config.headings && heading_lines_set.contains(&line_number))
277                    || (!effective_config.code_blocks
278                        && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
279                    || (!effective_config.tables && table_lines_set.contains(&line_number))
280                    || ctx.lines[line_number - 1].blockquote.is_some()
281                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
282                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
283                    || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
284                    || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
285                    || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
286                {
287                    continue;
288                }
289
290                // Check if this is a paragraph/regular text line
291                // If paragraphs = false, skip lines that are NOT in special blocks
292                if !effective_config.paragraphs {
293                    let is_special_block = heading_lines_set.contains(&line_number)
294                        || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
295                        || table_lines_set.contains(&line_number)
296                        || ctx.lines[line_number - 1].blockquote.is_some()
297                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
298                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
299                        || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
300                        || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
301                        || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment);
302
303                    // Skip regular paragraph text when paragraphs = false
304                    if !is_special_block {
305                        continue;
306                    }
307                }
308
309                // Skip lines that are only a URL, image ref, or link ref
310                if self.should_ignore_line(line, &lines, line_idx, ctx) {
311                    continue;
312                }
313            }
314
315            // In sentence-per-line mode, check if this is a single long sentence
316            // If so, emit a warning without a fix (user must manually rephrase)
317            if effective_config.reflow_mode == ReflowMode::SentencePerLine {
318                let sentences = split_into_sentences(line.trim());
319                if sentences.len() == 1 {
320                    // Single sentence that's too long - warn but don't auto-fix
321                    let message = format!("Line length {effective_length} exceeds {line_limit} characters");
322
323                    let (start_line, start_col, end_line, end_col) =
324                        calculate_excess_range(line_number, line, line_limit);
325
326                    warnings.push(LintWarning {
327                        rule_name: Some(self.name().to_string()),
328                        message,
329                        line: start_line,
330                        column: start_col,
331                        end_line,
332                        end_column: end_col,
333                        severity: Severity::Warning,
334                        fix: None, // No auto-fix for long single sentences
335                    });
336                    continue;
337                }
338                // Multiple sentences will be handled by paragraph-based reflow
339                continue;
340            }
341
342            // Don't provide fix for individual lines when reflow is enabled
343            // Paragraph-based fixes will be handled separately
344            let fix = None;
345
346            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
347
348            // Calculate precise character range for the excess portion
349            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
350
351            warnings.push(LintWarning {
352                rule_name: Some(self.name().to_string()),
353                message,
354                line: start_line,
355                column: start_col,
356                end_line,
357                end_column: end_col,
358                severity: Severity::Warning,
359                fix,
360            });
361        }
362
363        // If reflow is enabled, generate paragraph-based fixes
364        if effective_config.reflow {
365            let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
366            // Merge paragraph warnings with line warnings, removing duplicates
367            for pw in paragraph_warnings {
368                // Remove any line warnings that overlap with this paragraph
369                warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
370                warnings.push(pw);
371            }
372        }
373
374        Ok(warnings)
375    }
376
377    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
378        // For CLI usage, apply fixes from warnings
379        // LSP will use the warning-based fixes directly
380        let warnings = self.check(ctx)?;
381
382        // If there are no fixes, return content unchanged
383        if !warnings.iter().any(|w| w.fix.is_some()) {
384            return Ok(ctx.content.to_string());
385        }
386
387        // Apply warning-based fixes
388        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
389            .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
390    }
391
392    fn as_any(&self) -> &dyn std::any::Any {
393        self
394    }
395
396    fn category(&self) -> RuleCategory {
397        RuleCategory::Whitespace
398    }
399
400    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
401        self.should_skip_with_config(ctx, &self.config)
402    }
403
404    fn default_config_section(&self) -> Option<(String, toml::Value)> {
405        let default_config = MD013Config::default();
406        let json_value = serde_json::to_value(&default_config).ok()?;
407        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
408
409        if let toml::Value::Table(table) = toml_value {
410            if !table.is_empty() {
411                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
412            } else {
413                None
414            }
415        } else {
416            None
417        }
418    }
419
420    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
421        let mut aliases = std::collections::HashMap::new();
422        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
423        Some(aliases)
424    }
425
426    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
427    where
428        Self: Sized,
429    {
430        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
431        // Use global line_length if rule-specific config still has default value
432        if rule_config.line_length.get() == 80 {
433            rule_config.line_length = config.global.line_length;
434        }
435        Box::new(Self::from_config_struct(rule_config))
436    }
437}
438
439impl MD013LineLength {
440    /// Generate paragraph-based fixes
441    fn generate_paragraph_fixes(
442        &self,
443        ctx: &crate::lint_context::LintContext,
444        config: &MD013Config,
445        lines: &[&str],
446    ) -> Vec<LintWarning> {
447        let mut warnings = Vec::new();
448        let line_index = LineIndex::new(ctx.content);
449
450        let mut i = 0;
451        while i < lines.len() {
452            let line_num = i + 1;
453
454            // Skip special structures
455            let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
456                info.in_code_block
457                    || info.in_front_matter
458                    || info.in_html_block
459                    || info.in_html_comment
460                    || info.in_esm_block
461                    || info.in_jsx_expression
462                    || info.in_mdx_comment
463            });
464
465            if should_skip_due_to_line_info
466                || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
467                || lines[i].trim().starts_with('#')
468                || TableUtils::is_potential_table_row(lines[i])
469                || lines[i].trim().is_empty()
470                || is_horizontal_rule(lines[i].trim())
471                || is_template_directive_only(lines[i])
472            {
473                i += 1;
474                continue;
475            }
476
477            // Helper function to detect semantic line markers
478            let is_semantic_line = |content: &str| -> bool {
479                let trimmed = content.trim_start();
480                let semantic_markers = [
481                    "NOTE:",
482                    "WARNING:",
483                    "IMPORTANT:",
484                    "CAUTION:",
485                    "TIP:",
486                    "DANGER:",
487                    "HINT:",
488                    "INFO:",
489                ];
490                semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
491            };
492
493            // Helper function to detect fence markers (opening or closing)
494            let is_fence_marker = |content: &str| -> bool {
495                let trimmed = content.trim_start();
496                trimmed.starts_with("```") || trimmed.starts_with("~~~")
497            };
498
499            // Check if this is a list item - handle it specially
500            let trimmed = lines[i].trim();
501            if is_list_item(trimmed) {
502                // Collect the entire list item including continuation lines
503                let list_start = i;
504                let (marker, first_content) = extract_list_marker_and_content(lines[i]);
505                let marker_len = marker.len();
506
507                // Track lines and their types (content, code block, fence, nested list)
508                #[derive(Clone)]
509                enum LineType {
510                    Content(String),
511                    CodeBlock(String, usize),      // content and original indent
512                    NestedListItem(String, usize), // full line content and original indent
513                    SemanticLine(String),          // Lines starting with NOTE:, WARNING:, etc that should stay separate
514                    Empty,
515                }
516
517                let mut actual_indent: Option<usize> = None;
518                let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
519                i += 1;
520
521                // Collect continuation lines using ctx.lines for metadata
522                while i < lines.len() {
523                    let line_info = &ctx.lines[i];
524
525                    // Use pre-computed is_blank from ctx
526                    if line_info.is_blank {
527                        // Empty line - check if next line is indented (part of list item)
528                        if i + 1 < lines.len() {
529                            let next_info = &ctx.lines[i + 1];
530
531                            // Check if next line is indented enough to be continuation
532                            if !next_info.is_blank && next_info.indent >= marker_len {
533                                // This blank line is between paragraphs/blocks in the list item
534                                list_item_lines.push(LineType::Empty);
535                                i += 1;
536                                continue;
537                            }
538                        }
539                        // No indented line after blank, end of list item
540                        break;
541                    }
542
543                    // Use pre-computed indent from ctx
544                    let indent = line_info.indent;
545
546                    // Valid continuation must be indented at least marker_len
547                    if indent >= marker_len {
548                        let trimmed = line_info.content(ctx.content).trim();
549
550                        // Use pre-computed in_code_block from ctx
551                        if line_info.in_code_block {
552                            list_item_lines.push(LineType::CodeBlock(
553                                line_info.content(ctx.content)[indent..].to_string(),
554                                indent,
555                            ));
556                            i += 1;
557                            continue;
558                        }
559
560                        // Check if this is a SIBLING list item (breaks parent)
561                        // Nested lists are indented >= marker_len and are PART of the parent item
562                        // Siblings are at indent < marker_len (at or before parent marker)
563                        if is_list_item(trimmed) && indent < marker_len {
564                            // This is a sibling item at same or higher level - end parent item
565                            break;
566                        }
567
568                        // Check if this is a NESTED list item marker
569                        // Nested lists should be processed separately UNLESS they're part of a
570                        // multi-paragraph list item (indicated by a blank line before them OR
571                        // it's a continuation of an already-started nested list)
572                        if is_list_item(trimmed) && indent >= marker_len {
573                            // Check if there was a blank line before this (multi-paragraph context)
574                            let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
575
576                            // Check if we've already seen nested list content (another nested item)
577                            let has_nested_content = list_item_lines.iter().any(|line| {
578                                matches!(line, LineType::Content(c) if is_list_item(c.trim()))
579                                    || matches!(line, LineType::NestedListItem(_, _))
580                            });
581
582                            if !has_blank_before && !has_nested_content {
583                                // Single-paragraph context with no prior nested items: starts a new item
584                                // End parent collection; nested list will be processed next
585                                break;
586                            }
587                            // else: multi-paragraph context or continuation of nested list, keep collecting
588                            // Mark this as a nested list item to preserve its structure
589                            list_item_lines.push(LineType::NestedListItem(
590                                line_info.content(ctx.content)[indent..].to_string(),
591                                indent,
592                            ));
593                            i += 1;
594                            continue;
595                        }
596
597                        // Normal continuation: marker_len to marker_len+3
598                        if indent <= marker_len + 3 {
599                            // Set actual_indent from first non-code continuation if not set
600                            if actual_indent.is_none() {
601                                actual_indent = Some(indent);
602                            }
603
604                            // Extract content (remove indentation and trailing whitespace)
605                            // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
606                            // See: https://github.com/rvben/rumdl/issues/76
607                            let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
608
609                            // Check if this is a fence marker (opening or closing)
610                            // These should be treated as code block lines, not paragraph content
611                            if is_fence_marker(&content) {
612                                list_item_lines.push(LineType::CodeBlock(content, indent));
613                            }
614                            // Check if this is a semantic line (NOTE:, WARNING:, etc.)
615                            else if is_semantic_line(&content) {
616                                list_item_lines.push(LineType::SemanticLine(content));
617                            } else {
618                                list_item_lines.push(LineType::Content(content));
619                            }
620                            i += 1;
621                        } else {
622                            // indent >= marker_len + 4: indented code block
623                            list_item_lines.push(LineType::CodeBlock(
624                                line_info.content(ctx.content)[indent..].to_string(),
625                                indent,
626                            ));
627                            i += 1;
628                        }
629                    } else {
630                        // Not indented enough, end of list item
631                        break;
632                    }
633                }
634
635                // Use detected indent or fallback to marker length
636                let indent_size = actual_indent.unwrap_or(marker_len);
637                let expected_indent = " ".repeat(indent_size);
638
639                // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
640                #[derive(Clone)]
641                enum Block {
642                    Paragraph(Vec<String>),
643                    Code {
644                        lines: Vec<(String, usize)>, // (content, indent) pairs
645                        has_preceding_blank: bool,   // Whether there was a blank line before this block
646                    },
647                    NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
648                    SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
649                    Html {
650                        lines: Vec<String>,        // HTML content preserved exactly as-is
651                        has_preceding_blank: bool, // Whether there was a blank line before this block
652                    },
653                }
654
655                // HTML tag detection helpers
656                // Block-level HTML tags that should trigger HTML block detection
657                const BLOCK_LEVEL_TAGS: &[&str] = &[
658                    "div",
659                    "details",
660                    "summary",
661                    "section",
662                    "article",
663                    "header",
664                    "footer",
665                    "nav",
666                    "aside",
667                    "main",
668                    "table",
669                    "thead",
670                    "tbody",
671                    "tfoot",
672                    "tr",
673                    "td",
674                    "th",
675                    "ul",
676                    "ol",
677                    "li",
678                    "dl",
679                    "dt",
680                    "dd",
681                    "pre",
682                    "blockquote",
683                    "figure",
684                    "figcaption",
685                    "form",
686                    "fieldset",
687                    "legend",
688                    "hr",
689                    "p",
690                    "h1",
691                    "h2",
692                    "h3",
693                    "h4",
694                    "h5",
695                    "h6",
696                    "style",
697                    "script",
698                    "noscript",
699                ];
700
701                fn is_block_html_opening_tag(line: &str) -> Option<String> {
702                    let trimmed = line.trim();
703
704                    // Check for HTML comments
705                    if trimmed.starts_with("<!--") {
706                        return Some("!--".to_string());
707                    }
708
709                    // Check for opening tags
710                    if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
711                        // Extract tag name from <tagname ...> or <tagname>
712                        let after_bracket = &trimmed[1..];
713                        if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
714                            let tag_name = after_bracket[..end].to_lowercase();
715
716                            // Only treat as block if it's a known block-level tag
717                            if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
718                                return Some(tag_name);
719                            }
720                        }
721                    }
722                    None
723                }
724
725                fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
726                    let trimmed = line.trim();
727
728                    // Special handling for HTML comments
729                    if tag_name == "!--" {
730                        return trimmed.ends_with("-->");
731                    }
732
733                    // Check for closing tags: </tagname> or </tagname ...>
734                    trimmed.starts_with(&format!("</{tag_name}>"))
735                        || trimmed.starts_with(&format!("</{tag_name}  "))
736                        || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
737                }
738
739                fn is_self_closing_tag(line: &str) -> bool {
740                    let trimmed = line.trim();
741                    trimmed.ends_with("/>")
742                }
743
744                let mut blocks: Vec<Block> = Vec::new();
745                let mut current_paragraph: Vec<String> = Vec::new();
746                let mut current_code_block: Vec<(String, usize)> = Vec::new();
747                let mut current_nested_list: Vec<(String, usize)> = Vec::new();
748                let mut current_html_block: Vec<String> = Vec::new();
749                let mut html_tag_stack: Vec<String> = Vec::new();
750                let mut in_code = false;
751                let mut in_nested_list = false;
752                let mut in_html_block = false;
753                let mut had_preceding_blank = false; // Track if we just saw an empty line
754                let mut code_block_has_preceding_blank = false; // Track blank before current code block
755                let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
756
757                for line in &list_item_lines {
758                    match line {
759                        LineType::Empty => {
760                            if in_code {
761                                current_code_block.push((String::new(), 0));
762                            } else if in_nested_list {
763                                current_nested_list.push((String::new(), 0));
764                            } else if in_html_block {
765                                // Allow blank lines inside HTML blocks
766                                current_html_block.push(String::new());
767                            } else if !current_paragraph.is_empty() {
768                                blocks.push(Block::Paragraph(current_paragraph.clone()));
769                                current_paragraph.clear();
770                            }
771                            // Mark that we saw a blank line
772                            had_preceding_blank = true;
773                        }
774                        LineType::Content(content) => {
775                            // Check if we're currently in an HTML block
776                            if in_html_block {
777                                current_html_block.push(content.clone());
778
779                                // Check if this line closes any open HTML tags
780                                if let Some(last_tag) = html_tag_stack.last() {
781                                    if is_html_closing_tag(content, last_tag) {
782                                        html_tag_stack.pop();
783
784                                        // If stack is empty, HTML block is complete
785                                        if html_tag_stack.is_empty() {
786                                            blocks.push(Block::Html {
787                                                lines: current_html_block.clone(),
788                                                has_preceding_blank: html_block_has_preceding_blank,
789                                            });
790                                            current_html_block.clear();
791                                            in_html_block = false;
792                                        }
793                                    } else if let Some(new_tag) = is_block_html_opening_tag(content) {
794                                        // Nested opening tag within HTML block
795                                        if !is_self_closing_tag(content) {
796                                            html_tag_stack.push(new_tag);
797                                        }
798                                    }
799                                }
800                                had_preceding_blank = false;
801                            } else {
802                                // Not in HTML block - check if this line starts one
803                                if let Some(tag_name) = is_block_html_opening_tag(content) {
804                                    // Flush current paragraph before starting HTML block
805                                    if in_code {
806                                        blocks.push(Block::Code {
807                                            lines: current_code_block.clone(),
808                                            has_preceding_blank: code_block_has_preceding_blank,
809                                        });
810                                        current_code_block.clear();
811                                        in_code = false;
812                                    } else if in_nested_list {
813                                        blocks.push(Block::NestedList(current_nested_list.clone()));
814                                        current_nested_list.clear();
815                                        in_nested_list = false;
816                                    } else if !current_paragraph.is_empty() {
817                                        blocks.push(Block::Paragraph(current_paragraph.clone()));
818                                        current_paragraph.clear();
819                                    }
820
821                                    // Start new HTML block
822                                    in_html_block = true;
823                                    html_block_has_preceding_blank = had_preceding_blank;
824                                    current_html_block.push(content.clone());
825
826                                    // Check if it's self-closing or needs a closing tag
827                                    if is_self_closing_tag(content) {
828                                        // Self-closing tag - complete the HTML block immediately
829                                        blocks.push(Block::Html {
830                                            lines: current_html_block.clone(),
831                                            has_preceding_blank: html_block_has_preceding_blank,
832                                        });
833                                        current_html_block.clear();
834                                        in_html_block = false;
835                                    } else {
836                                        // Regular opening tag - push to stack
837                                        html_tag_stack.push(tag_name);
838                                    }
839                                } else {
840                                    // Regular content line - add to paragraph
841                                    if in_code {
842                                        // Switching from code to content
843                                        blocks.push(Block::Code {
844                                            lines: current_code_block.clone(),
845                                            has_preceding_blank: code_block_has_preceding_blank,
846                                        });
847                                        current_code_block.clear();
848                                        in_code = false;
849                                    } else if in_nested_list {
850                                        // Switching from nested list to content
851                                        blocks.push(Block::NestedList(current_nested_list.clone()));
852                                        current_nested_list.clear();
853                                        in_nested_list = false;
854                                    }
855                                    current_paragraph.push(content.clone());
856                                }
857                                had_preceding_blank = false; // Reset after content
858                            }
859                        }
860                        LineType::CodeBlock(content, indent) => {
861                            if in_nested_list {
862                                // Switching from nested list to code
863                                blocks.push(Block::NestedList(current_nested_list.clone()));
864                                current_nested_list.clear();
865                                in_nested_list = false;
866                            } else if in_html_block {
867                                // Switching from HTML block to code (shouldn't happen normally, but handle it)
868                                blocks.push(Block::Html {
869                                    lines: current_html_block.clone(),
870                                    has_preceding_blank: html_block_has_preceding_blank,
871                                });
872                                current_html_block.clear();
873                                html_tag_stack.clear();
874                                in_html_block = false;
875                            }
876                            if !in_code {
877                                // Switching from content to code
878                                if !current_paragraph.is_empty() {
879                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
880                                    current_paragraph.clear();
881                                }
882                                in_code = true;
883                                // Record whether there was a blank line before this code block
884                                code_block_has_preceding_blank = had_preceding_blank;
885                            }
886                            current_code_block.push((content.clone(), *indent));
887                            had_preceding_blank = false; // Reset after code
888                        }
889                        LineType::NestedListItem(content, indent) => {
890                            if in_code {
891                                // Switching from code to nested list
892                                blocks.push(Block::Code {
893                                    lines: current_code_block.clone(),
894                                    has_preceding_blank: code_block_has_preceding_blank,
895                                });
896                                current_code_block.clear();
897                                in_code = false;
898                            } else if in_html_block {
899                                // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
900                                blocks.push(Block::Html {
901                                    lines: current_html_block.clone(),
902                                    has_preceding_blank: html_block_has_preceding_blank,
903                                });
904                                current_html_block.clear();
905                                html_tag_stack.clear();
906                                in_html_block = false;
907                            }
908                            if !in_nested_list {
909                                // Switching from content to nested list
910                                if !current_paragraph.is_empty() {
911                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
912                                    current_paragraph.clear();
913                                }
914                                in_nested_list = true;
915                            }
916                            current_nested_list.push((content.clone(), *indent));
917                            had_preceding_blank = false; // Reset after nested list
918                        }
919                        LineType::SemanticLine(content) => {
920                            // Semantic lines are standalone - flush any current block and add as separate block
921                            if in_code {
922                                blocks.push(Block::Code {
923                                    lines: current_code_block.clone(),
924                                    has_preceding_blank: code_block_has_preceding_blank,
925                                });
926                                current_code_block.clear();
927                                in_code = false;
928                            } else if in_nested_list {
929                                blocks.push(Block::NestedList(current_nested_list.clone()));
930                                current_nested_list.clear();
931                                in_nested_list = false;
932                            } else if in_html_block {
933                                blocks.push(Block::Html {
934                                    lines: current_html_block.clone(),
935                                    has_preceding_blank: html_block_has_preceding_blank,
936                                });
937                                current_html_block.clear();
938                                html_tag_stack.clear();
939                                in_html_block = false;
940                            } else if !current_paragraph.is_empty() {
941                                blocks.push(Block::Paragraph(current_paragraph.clone()));
942                                current_paragraph.clear();
943                            }
944                            // Add semantic line as its own block
945                            blocks.push(Block::SemanticLine(content.clone()));
946                            had_preceding_blank = false; // Reset after semantic line
947                        }
948                    }
949                }
950
951                // Push remaining block
952                if in_code && !current_code_block.is_empty() {
953                    blocks.push(Block::Code {
954                        lines: current_code_block,
955                        has_preceding_blank: code_block_has_preceding_blank,
956                    });
957                } else if in_nested_list && !current_nested_list.is_empty() {
958                    blocks.push(Block::NestedList(current_nested_list));
959                } else if in_html_block && !current_html_block.is_empty() {
960                    // If we still have an unclosed HTML block, push it anyway
961                    // (malformed HTML - missing closing tag)
962                    blocks.push(Block::Html {
963                        lines: current_html_block,
964                        has_preceding_blank: html_block_has_preceding_blank,
965                    });
966                } else if !current_paragraph.is_empty() {
967                    blocks.push(Block::Paragraph(current_paragraph));
968                }
969
970                // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
971                let content_lines: Vec<String> = list_item_lines
972                    .iter()
973                    .filter_map(|line| {
974                        if let LineType::Content(s) = line {
975                            Some(s.clone())
976                        } else {
977                            None
978                        }
979                    })
980                    .collect();
981
982                // Check if we need to reflow this list item
983                // We check the combined content to see if it exceeds length limits
984                let combined_content = content_lines.join(" ").trim().to_string();
985                let full_line = format!("{marker}{combined_content}");
986
987                // Helper to check if we should reflow in normalize mode
988                let should_normalize = || {
989                    // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
990                    // DO normalize if it has plain text content that spans multiple lines
991                    let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
992                    let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
993                    let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
994                    let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
995
996                    // If we have nested lists, code blocks, or semantic lines but no paragraphs, don't normalize
997                    if (has_nested_lists || has_code_blocks || has_semantic_lines) && !has_paragraphs {
998                        return false;
999                    }
1000
1001                    // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1002                    if has_paragraphs {
1003                        let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1004                        if paragraph_count > 1 {
1005                            // Multiple paragraph blocks should be normalized
1006                            return true;
1007                        }
1008
1009                        // Single paragraph block: normalize if it has multiple content lines
1010                        if content_lines.len() > 1 {
1011                            return true;
1012                        }
1013                    }
1014
1015                    false
1016                };
1017
1018                let needs_reflow = match config.reflow_mode {
1019                    ReflowMode::Normalize => {
1020                        // Only reflow if:
1021                        // 1. The combined line would exceed the limit, OR
1022                        // 2. The list item should be normalized (has multi-line plain text)
1023                        let combined_length = self.calculate_effective_length(&full_line);
1024                        if combined_length > config.line_length.get() {
1025                            true
1026                        } else {
1027                            should_normalize()
1028                        }
1029                    }
1030                    ReflowMode::SentencePerLine => {
1031                        // Check if list item has multiple sentences
1032                        let sentences = split_into_sentences(&combined_content);
1033                        sentences.len() > 1
1034                    }
1035                    ReflowMode::Default => {
1036                        // In default mode, only reflow if any individual line exceeds limit
1037                        // Check the original lines, not the combined content
1038                        (list_start..i)
1039                            .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1040                    }
1041                };
1042
1043                if needs_reflow {
1044                    let start_range = line_index.whole_line_range(list_start + 1);
1045                    let end_line = i - 1;
1046                    let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1047                        line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1048                    } else {
1049                        line_index.whole_line_range(end_line + 1)
1050                    };
1051                    let byte_range = start_range.start..end_range.end;
1052
1053                    // Reflow each block (paragraphs only, preserve code blocks)
1054                    // When line_length = 0 (no limit), use a very large value for reflow
1055                    let reflow_line_length = if config.line_length.is_unlimited() {
1056                        usize::MAX
1057                    } else {
1058                        config.line_length.get().saturating_sub(indent_size).max(1)
1059                    };
1060                    let reflow_options = crate::utils::text_reflow::ReflowOptions {
1061                        line_length: reflow_line_length,
1062                        break_on_sentences: true,
1063                        preserve_breaks: false,
1064                        sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1065                        abbreviations: config.abbreviations.clone(),
1066                    };
1067
1068                    let mut result: Vec<String> = Vec::new();
1069                    let mut is_first_block = true;
1070
1071                    for (block_idx, block) in blocks.iter().enumerate() {
1072                        match block {
1073                            Block::Paragraph(para_lines) => {
1074                                // Split the paragraph into segments at hard break boundaries
1075                                // Each segment can be reflowed independently
1076                                let segments = split_into_segments(para_lines);
1077
1078                                for (segment_idx, segment) in segments.iter().enumerate() {
1079                                    // Check if this segment ends with a hard break and what type
1080                                    let hard_break_type = segment.last().and_then(|line| {
1081                                        let line = line.strip_suffix('\r').unwrap_or(line);
1082                                        if line.ends_with('\\') {
1083                                            Some("\\")
1084                                        } else if line.ends_with("  ") {
1085                                            Some("  ")
1086                                        } else {
1087                                            None
1088                                        }
1089                                    });
1090
1091                                    // Join and reflow the segment (removing the hard break marker for processing)
1092                                    let segment_for_reflow: Vec<String> = segment
1093                                        .iter()
1094                                        .map(|line| {
1095                                            // Strip hard break marker (2 spaces or backslash) for reflow processing
1096                                            if line.ends_with('\\') {
1097                                                line[..line.len() - 1].trim_end().to_string()
1098                                            } else if line.ends_with("  ") {
1099                                                line[..line.len() - 2].trim_end().to_string()
1100                                            } else {
1101                                                line.clone()
1102                                            }
1103                                        })
1104                                        .collect();
1105
1106                                    let segment_text = segment_for_reflow.join(" ").trim().to_string();
1107                                    if !segment_text.is_empty() {
1108                                        let reflowed =
1109                                            crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1110
1111                                        if is_first_block && segment_idx == 0 {
1112                                            // First segment of first block starts with marker
1113                                            result.push(format!("{marker}{}", reflowed[0]));
1114                                            for line in reflowed.iter().skip(1) {
1115                                                result.push(format!("{expected_indent}{line}"));
1116                                            }
1117                                            is_first_block = false;
1118                                        } else {
1119                                            // Subsequent segments
1120                                            for line in reflowed {
1121                                                result.push(format!("{expected_indent}{line}"));
1122                                            }
1123                                        }
1124
1125                                        // If this segment had a hard break, add it back to the last line
1126                                        // Preserve the original hard break format (backslash or two spaces)
1127                                        if let Some(break_marker) = hard_break_type
1128                                            && let Some(last_line) = result.last_mut()
1129                                        {
1130                                            last_line.push_str(break_marker);
1131                                        }
1132                                    }
1133                                }
1134
1135                                // Add blank line after paragraph block if there's a next block
1136                                // BUT: check if next block is a code block that doesn't want a preceding blank
1137                                if block_idx < blocks.len() - 1 {
1138                                    let next_block = &blocks[block_idx + 1];
1139                                    let should_add_blank = match next_block {
1140                                        Block::Code {
1141                                            has_preceding_blank, ..
1142                                        } => *has_preceding_blank,
1143                                        _ => true, // For all other blocks, add blank line
1144                                    };
1145                                    if should_add_blank {
1146                                        result.push(String::new());
1147                                    }
1148                                }
1149                            }
1150                            Block::Code {
1151                                lines: code_lines,
1152                                has_preceding_blank: _,
1153                            } => {
1154                                // Preserve code blocks as-is with original indentation
1155                                // NOTE: Blank line before code block is handled by the previous block
1156                                // (see paragraph block's logic above)
1157
1158                                for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1159                                    if is_first_block && idx == 0 {
1160                                        // First line of first block gets marker
1161                                        result.push(format!(
1162                                            "{marker}{}",
1163                                            " ".repeat(orig_indent - marker_len) + content
1164                                        ));
1165                                        is_first_block = false;
1166                                    } else if content.is_empty() {
1167                                        result.push(String::new());
1168                                    } else {
1169                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1170                                    }
1171                                }
1172                            }
1173                            Block::NestedList(nested_items) => {
1174                                // Preserve nested list items as-is with original indentation
1175                                if !is_first_block {
1176                                    result.push(String::new());
1177                                }
1178
1179                                for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1180                                    if is_first_block && idx == 0 {
1181                                        // First line of first block gets marker
1182                                        result.push(format!(
1183                                            "{marker}{}",
1184                                            " ".repeat(orig_indent - marker_len) + content
1185                                        ));
1186                                        is_first_block = false;
1187                                    } else if content.is_empty() {
1188                                        result.push(String::new());
1189                                    } else {
1190                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1191                                    }
1192                                }
1193
1194                                // Add blank line after nested list if there's a next block
1195                                // Check if next block is a code block that doesn't want a preceding blank
1196                                if block_idx < blocks.len() - 1 {
1197                                    let next_block = &blocks[block_idx + 1];
1198                                    let should_add_blank = match next_block {
1199                                        Block::Code {
1200                                            has_preceding_blank, ..
1201                                        } => *has_preceding_blank,
1202                                        _ => true, // For all other blocks, add blank line
1203                                    };
1204                                    if should_add_blank {
1205                                        result.push(String::new());
1206                                    }
1207                                }
1208                            }
1209                            Block::SemanticLine(content) => {
1210                                // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1211                                // Add blank line before if not first block
1212                                if !is_first_block {
1213                                    result.push(String::new());
1214                                }
1215
1216                                if is_first_block {
1217                                    // First block starts with marker
1218                                    result.push(format!("{marker}{content}"));
1219                                    is_first_block = false;
1220                                } else {
1221                                    // Subsequent blocks use expected indent
1222                                    result.push(format!("{expected_indent}{content}"));
1223                                }
1224
1225                                // Add blank line after semantic line if there's a next block
1226                                // Check if next block is a code block that doesn't want a preceding blank
1227                                if block_idx < blocks.len() - 1 {
1228                                    let next_block = &blocks[block_idx + 1];
1229                                    let should_add_blank = match next_block {
1230                                        Block::Code {
1231                                            has_preceding_blank, ..
1232                                        } => *has_preceding_blank,
1233                                        _ => true, // For all other blocks, add blank line
1234                                    };
1235                                    if should_add_blank {
1236                                        result.push(String::new());
1237                                    }
1238                                }
1239                            }
1240                            Block::Html {
1241                                lines: html_lines,
1242                                has_preceding_blank: _,
1243                            } => {
1244                                // Preserve HTML blocks exactly as-is with original indentation
1245                                // NOTE: Blank line before HTML block is handled by the previous block
1246
1247                                for (idx, line) in html_lines.iter().enumerate() {
1248                                    if is_first_block && idx == 0 {
1249                                        // First line of first block gets marker
1250                                        result.push(format!("{marker}{line}"));
1251                                        is_first_block = false;
1252                                    } else if line.is_empty() {
1253                                        // Preserve blank lines inside HTML blocks
1254                                        result.push(String::new());
1255                                    } else {
1256                                        // Preserve lines with their original content (already includes indentation)
1257                                        result.push(format!("{expected_indent}{line}"));
1258                                    }
1259                                }
1260
1261                                // Add blank line after HTML block if there's a next block
1262                                if block_idx < blocks.len() - 1 {
1263                                    let next_block = &blocks[block_idx + 1];
1264                                    let should_add_blank = match next_block {
1265                                        Block::Code {
1266                                            has_preceding_blank, ..
1267                                        } => *has_preceding_blank,
1268                                        Block::Html {
1269                                            has_preceding_blank, ..
1270                                        } => *has_preceding_blank,
1271                                        _ => true, // For all other blocks, add blank line
1272                                    };
1273                                    if should_add_blank {
1274                                        result.push(String::new());
1275                                    }
1276                                }
1277                            }
1278                        }
1279                    }
1280
1281                    let reflowed_text = result.join("\n");
1282
1283                    // Preserve trailing newline
1284                    let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1285                        format!("{reflowed_text}\n")
1286                    } else {
1287                        reflowed_text
1288                    };
1289
1290                    // Get the original text to compare
1291                    let original_text = &ctx.content[byte_range.clone()];
1292
1293                    // Only generate a warning if the replacement is different from the original
1294                    if original_text != replacement {
1295                        // Generate an appropriate message based on why reflow is needed
1296                        let message = match config.reflow_mode {
1297                            ReflowMode::SentencePerLine => {
1298                                let num_sentences = split_into_sentences(&combined_content).len();
1299                                let num_lines = content_lines.len();
1300                                if num_lines == 1 {
1301                                    // Single line with multiple sentences
1302                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1303                                } else {
1304                                    // Multiple lines - could be split sentences or mixed
1305                                    format!(
1306                                        "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1307                                    )
1308                                }
1309                            }
1310                            ReflowMode::Normalize => {
1311                                let combined_length = self.calculate_effective_length(&full_line);
1312                                if combined_length > config.line_length.get() {
1313                                    format!(
1314                                        "Line length {} exceeds {} characters",
1315                                        combined_length,
1316                                        config.line_length.get()
1317                                    )
1318                                } else {
1319                                    "Multi-line content can be normalized".to_string()
1320                                }
1321                            }
1322                            ReflowMode::Default => {
1323                                let combined_length = self.calculate_effective_length(&full_line);
1324                                format!(
1325                                    "Line length {} exceeds {} characters",
1326                                    combined_length,
1327                                    config.line_length.get()
1328                                )
1329                            }
1330                        };
1331
1332                        warnings.push(LintWarning {
1333                            rule_name: Some(self.name().to_string()),
1334                            message,
1335                            line: list_start + 1,
1336                            column: 1,
1337                            end_line: end_line + 1,
1338                            end_column: lines[end_line].len() + 1,
1339                            severity: Severity::Warning,
1340                            fix: Some(crate::rule::Fix {
1341                                range: byte_range,
1342                                replacement,
1343                            }),
1344                        });
1345                    }
1346                }
1347                continue;
1348            }
1349
1350            // Found start of a paragraph - collect all lines in it
1351            let paragraph_start = i;
1352            let mut paragraph_lines = vec![lines[i]];
1353            i += 1;
1354
1355            while i < lines.len() {
1356                let next_line = lines[i];
1357                let next_line_num = i + 1;
1358                let next_trimmed = next_line.trim();
1359
1360                // Stop at paragraph boundaries
1361                if next_trimmed.is_empty()
1362                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1363                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1364                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1365                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1366                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1367                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1368                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1369                    || (next_line_num > 0
1370                        && next_line_num <= ctx.lines.len()
1371                        && ctx.lines[next_line_num - 1].blockquote.is_some())
1372                    || next_trimmed.starts_with('#')
1373                    || TableUtils::is_potential_table_row(next_line)
1374                    || is_list_item(next_trimmed)
1375                    || is_horizontal_rule(next_trimmed)
1376                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1377                    || is_template_directive_only(next_line)
1378                {
1379                    break;
1380                }
1381
1382                // Check if the previous line ends with a hard break (2+ spaces or backslash)
1383                if i > 0 && has_hard_break(lines[i - 1]) {
1384                    // Don't include lines after hard breaks in the same paragraph
1385                    break;
1386                }
1387
1388                paragraph_lines.push(next_line);
1389                i += 1;
1390            }
1391
1392            // Combine paragraph lines into a single string for processing
1393            // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1394            let paragraph_text = paragraph_lines.join(" ");
1395
1396            // Skip reflowing if this paragraph contains definition list items
1397            // Definition lists are multi-line structures that should not be joined
1398            let contains_definition_list = paragraph_lines
1399                .iter()
1400                .any(|line| crate::utils::is_definition_list_item(line));
1401
1402            if contains_definition_list {
1403                // Don't reflow definition lists - skip this paragraph
1404                i = paragraph_start + paragraph_lines.len();
1405                continue;
1406            }
1407
1408            // Check if this paragraph needs reflowing
1409            let needs_reflow = match config.reflow_mode {
1410                ReflowMode::Normalize => {
1411                    // In normalize mode, reflow multi-line paragraphs
1412                    paragraph_lines.len() > 1
1413                }
1414                ReflowMode::SentencePerLine => {
1415                    // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1416                    // Note: we check the joined text because sentences can span multiple lines
1417                    let sentences = split_into_sentences(&paragraph_text);
1418
1419                    // Always reflow if multiple sentences on one line
1420                    if sentences.len() > 1 {
1421                        true
1422                    } else if paragraph_lines.len() > 1 {
1423                        // For single-sentence paragraphs spanning multiple lines:
1424                        // Reflow if they COULD fit on one line (respecting line-length constraint)
1425                        if config.line_length.is_unlimited() {
1426                            // No line-length constraint - always join single sentences
1427                            true
1428                        } else {
1429                            // Only join if it fits within line-length
1430                            let effective_length = self.calculate_effective_length(&paragraph_text);
1431                            effective_length <= config.line_length.get()
1432                        }
1433                    } else {
1434                        false
1435                    }
1436                }
1437                ReflowMode::Default => {
1438                    // In default mode, only reflow if lines exceed limit
1439                    paragraph_lines
1440                        .iter()
1441                        .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1442                }
1443            };
1444
1445            if needs_reflow {
1446                // Calculate byte range for this paragraph
1447                // Use whole_line_range for each line and combine
1448                let start_range = line_index.whole_line_range(paragraph_start + 1);
1449                let end_line = paragraph_start + paragraph_lines.len() - 1;
1450
1451                // For the last line, we want to preserve any trailing newline
1452                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1453                    // Last line without trailing newline - use line_text_range
1454                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1455                } else {
1456                    // Not the last line or has trailing newline - use whole_line_range
1457                    line_index.whole_line_range(end_line + 1)
1458                };
1459
1460                let byte_range = start_range.start..end_range.end;
1461
1462                // Check if the paragraph ends with a hard break and what type
1463                let hard_break_type = paragraph_lines.last().and_then(|line| {
1464                    let line = line.strip_suffix('\r').unwrap_or(line);
1465                    if line.ends_with('\\') {
1466                        Some("\\")
1467                    } else if line.ends_with("  ") {
1468                        Some("  ")
1469                    } else {
1470                        None
1471                    }
1472                });
1473
1474                // Reflow the paragraph
1475                // When line_length = 0 (no limit), use a very large value for reflow
1476                let reflow_line_length = if config.line_length.is_unlimited() {
1477                    usize::MAX
1478                } else {
1479                    config.line_length.get()
1480                };
1481                let reflow_options = crate::utils::text_reflow::ReflowOptions {
1482                    line_length: reflow_line_length,
1483                    break_on_sentences: true,
1484                    preserve_breaks: false,
1485                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1486                    abbreviations: config.abbreviations.clone(),
1487                };
1488                let mut reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
1489
1490                // If the original paragraph ended with a hard break, preserve it
1491                // Preserve the original hard break format (backslash or two spaces)
1492                if let Some(break_marker) = hard_break_type
1493                    && !reflowed.is_empty()
1494                {
1495                    let last_idx = reflowed.len() - 1;
1496                    if !has_hard_break(&reflowed[last_idx]) {
1497                        reflowed[last_idx].push_str(break_marker);
1498                    }
1499                }
1500
1501                let reflowed_text = reflowed.join("\n");
1502
1503                // Preserve trailing newline if the original paragraph had one
1504                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1505                    format!("{reflowed_text}\n")
1506                } else {
1507                    reflowed_text
1508                };
1509
1510                // Get the original text to compare
1511                let original_text = &ctx.content[byte_range.clone()];
1512
1513                // Only generate a warning if the replacement is different from the original
1514                if original_text != replacement {
1515                    // Create warning with actual fix
1516                    // In default mode, report the specific line that violates
1517                    // In normalize mode, report the whole paragraph
1518                    // In sentence-per-line mode, report the entire paragraph
1519                    let (warning_line, warning_end_line) = match config.reflow_mode {
1520                        ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1521                        ReflowMode::SentencePerLine => {
1522                            // Highlight the entire paragraph that needs reformatting
1523                            (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1524                        }
1525                        ReflowMode::Default => {
1526                            // Find the first line that exceeds the limit
1527                            let mut violating_line = paragraph_start;
1528                            for (idx, line) in paragraph_lines.iter().enumerate() {
1529                                if self.calculate_effective_length(line) > config.line_length.get() {
1530                                    violating_line = paragraph_start + idx;
1531                                    break;
1532                                }
1533                            }
1534                            (violating_line + 1, violating_line + 1)
1535                        }
1536                    };
1537
1538                    warnings.push(LintWarning {
1539                        rule_name: Some(self.name().to_string()),
1540                        message: match config.reflow_mode {
1541                            ReflowMode::Normalize => format!(
1542                                "Paragraph could be normalized to use line length of {} characters",
1543                                config.line_length.get()
1544                            ),
1545                            ReflowMode::SentencePerLine => {
1546                                let num_sentences = split_into_sentences(&paragraph_text).len();
1547                                if paragraph_lines.len() == 1 {
1548                                    // Single line with multiple sentences
1549                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
1550                                } else {
1551                                    let num_lines = paragraph_lines.len();
1552                                    // Multiple lines - could be split sentences or mixed
1553                                    format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1554                                }
1555                            },
1556                            ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1557                        },
1558                        line: warning_line,
1559                        column: 1,
1560                        end_line: warning_end_line,
1561                        end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1562                        severity: Severity::Warning,
1563                        fix: Some(crate::rule::Fix {
1564                            range: byte_range,
1565                            replacement,
1566                        }),
1567                    });
1568                }
1569            }
1570        }
1571
1572        warnings
1573    }
1574
1575    /// Calculate string length based on the configured length mode
1576    fn calculate_string_length(&self, s: &str) -> usize {
1577        match self.config.length_mode {
1578            LengthMode::Chars => s.chars().count(),
1579            LengthMode::Visual => s.width(),
1580            LengthMode::Bytes => s.len(),
1581        }
1582    }
1583
1584    /// Calculate effective line length excluding unbreakable URLs
1585    fn calculate_effective_length(&self, line: &str) -> usize {
1586        if self.config.strict {
1587            // In strict mode, count everything
1588            return self.calculate_string_length(line);
1589        }
1590
1591        // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1592        let bytes = line.as_bytes();
1593        if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1594            return self.calculate_string_length(line);
1595        }
1596
1597        // More precise check for URLs and links
1598        if !line.contains("http") && !line.contains('[') {
1599            return self.calculate_string_length(line);
1600        }
1601
1602        let mut effective_line = line.to_string();
1603
1604        // First handle markdown links to avoid double-counting URLs
1605        // Pattern: [text](very-long-url) -> [text](url)
1606        if line.contains('[') && line.contains("](") {
1607            for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1608                if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1609                    && url.as_str().len() > 15
1610                {
1611                    let replacement = format!("[{}](url)", text.as_str());
1612                    effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1613                }
1614            }
1615        }
1616
1617        // Then replace bare URLs with a placeholder of reasonable length
1618        // This allows lines with long URLs to pass if the rest of the content is reasonable
1619        if effective_line.contains("http") {
1620            for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1621                let url = url_match.as_str();
1622                // Skip if this URL is already part of a markdown link we handled
1623                if !effective_line.contains(&format!("({url})")) {
1624                    // Replace URL with placeholder that represents a "reasonable" URL length
1625                    // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1626                    let placeholder = "x".repeat(15.min(url.len()));
1627                    effective_line = effective_line.replacen(url, &placeholder, 1);
1628                }
1629            }
1630        }
1631
1632        self.calculate_string_length(&effective_line)
1633    }
1634}