rumdl_lib/rules/md013_line_length/
mod.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15    BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16    reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25    extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_list_item,
26    is_standalone_link_or_image_line, split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37    pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42    line_idx: usize,
43    data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48        Self {
49            config: MD013Config {
50                line_length: crate::types::LineLength::new(line_length),
51                code_blocks,
52                tables,
53                headings,
54                paragraphs: true, // Default to true for backwards compatibility
55                strict,
56                reflow: false,
57                reflow_mode: ReflowMode::default(),
58                length_mode: LengthMode::default(),
59                abbreviations: Vec::new(),
60            },
61        }
62    }
63
64    pub fn from_config_struct(config: MD013Config) -> Self {
65        Self { config }
66    }
67
68    /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
69    fn reflow_length_mode(&self) -> ReflowLengthMode {
70        match self.config.length_mode {
71            LengthMode::Chars => ReflowLengthMode::Chars,
72            LengthMode::Visual => ReflowLengthMode::Visual,
73            LengthMode::Bytes => ReflowLengthMode::Bytes,
74        }
75    }
76
77    fn should_ignore_line(
78        &self,
79        line: &str,
80        _lines: &[&str],
81        current_line: usize,
82        ctx: &crate::lint_context::LintContext,
83    ) -> bool {
84        if self.config.strict {
85            return false;
86        }
87
88        // Quick check for common patterns before expensive regex
89        let trimmed = line.trim();
90
91        // Only skip if the entire line is a URL (quick check first)
92        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
93            return true;
94        }
95
96        // Only skip if the entire line is an image reference (quick check first)
97        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
98            return true;
99        }
100
101        // Note: link reference definitions are handled as always-exempt (even in strict mode)
102        // in the main check loop, so they don't need to be checked here.
103
104        // Code blocks with long strings (only check if in code block)
105        if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
106            && !trimmed.is_empty()
107            && !line.contains(' ')
108            && !line.contains('\t')
109        {
110            return true;
111        }
112
113        false
114    }
115
116    /// Check if rule should skip based on provided config (used for inline config support)
117    fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
118        // Skip if content is empty
119        if ctx.content.is_empty() {
120            return true;
121        }
122
123        // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
124        if config.reflow
125            && (config.reflow_mode == ReflowMode::SentencePerLine
126                || config.reflow_mode == ReflowMode::SemanticLineBreaks
127                || config.reflow_mode == ReflowMode::Normalize)
128        {
129            return false;
130        }
131
132        // Quick check: if total content is shorter than line limit, definitely skip
133        if ctx.content.len() <= config.line_length.get() {
134            return true;
135        }
136
137        // Skip if no line exceeds the limit
138        !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
139    }
140}
141
142impl Rule for MD013LineLength {
143    fn name(&self) -> &'static str {
144        "MD013"
145    }
146
147    fn description(&self) -> &'static str {
148        "Line length should not be excessive"
149    }
150
151    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
152        // Use pre-parsed inline config from LintContext
153        let config_override = ctx.inline_config().get_rule_config("MD013");
154
155        // Apply configuration override if present
156        let effective_config = if let Some(json_config) = config_override {
157            if let Some(obj) = json_config.as_object() {
158                let mut config = self.config.clone();
159                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
160                    config.line_length = crate::types::LineLength::new(line_length as usize);
161                }
162                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
163                    config.code_blocks = code_blocks;
164                }
165                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
166                    config.tables = tables;
167                }
168                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
169                    config.headings = headings;
170                }
171                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
172                    config.strict = strict;
173                }
174                if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
175                    config.reflow = reflow;
176                }
177                if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
178                    config.reflow_mode = match reflow_mode {
179                        "default" => ReflowMode::Default,
180                        "normalize" => ReflowMode::Normalize,
181                        "sentence-per-line" => ReflowMode::SentencePerLine,
182                        "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
183                        _ => ReflowMode::default(),
184                    };
185                }
186                config
187            } else {
188                self.config.clone()
189            }
190        } else {
191            self.config.clone()
192        };
193
194        // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
195        // But don't skip if we're in reflow mode with Normalize or SentencePerLine
196        if self.should_skip_with_config(ctx, &effective_config)
197            && !(effective_config.reflow
198                && (effective_config.reflow_mode == ReflowMode::Normalize
199                    || effective_config.reflow_mode == ReflowMode::SentencePerLine
200                    || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
201        {
202            return Ok(Vec::new());
203        }
204
205        // Direct implementation without DocumentStructure
206        let mut warnings = Vec::new();
207
208        // Special handling: line_length = 0 means "no line length limit"
209        // Skip all line length checks, but still allow reflow if enabled
210        let skip_length_checks = effective_config.line_length.is_unlimited();
211
212        // Pre-filter lines that could be problematic to avoid processing all lines
213        let mut candidate_lines = Vec::new();
214        if !skip_length_checks {
215            for (line_idx, line_info) in ctx.lines.iter().enumerate() {
216                // Skip front matter - it should never be linted
217                if line_info.in_front_matter {
218                    continue;
219                }
220
221                // Quick length check first
222                if line_info.byte_len > effective_config.line_length.get() {
223                    candidate_lines.push(line_idx);
224                }
225            }
226        }
227
228        // If no candidate lines and not in normalize or sentence-per-line mode, early return
229        if candidate_lines.is_empty()
230            && !(effective_config.reflow
231                && (effective_config.reflow_mode == ReflowMode::Normalize
232                    || effective_config.reflow_mode == ReflowMode::SentencePerLine
233                    || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
234        {
235            return Ok(warnings);
236        }
237
238        let lines = ctx.raw_lines();
239
240        // Create a quick lookup set for heading lines
241        // We need this for both the heading skip check AND the paragraphs check
242        let heading_lines_set: std::collections::HashSet<usize> = ctx
243            .lines
244            .iter()
245            .enumerate()
246            .filter(|(_, line)| line.heading.is_some())
247            .map(|(idx, _)| idx + 1)
248            .collect();
249
250        // Use pre-computed table blocks from context
251        // We need this for both the table skip check AND the paragraphs check
252        let table_blocks = &ctx.table_blocks;
253        let mut table_lines_set = std::collections::HashSet::new();
254        for table in table_blocks {
255            table_lines_set.insert(table.header_line + 1);
256            table_lines_set.insert(table.delimiter_line + 1);
257            for &line in &table.content_lines {
258                table_lines_set.insert(line + 1);
259            }
260        }
261
262        // Process candidate lines for line length checks
263        for &line_idx in &candidate_lines {
264            let line_number = line_idx + 1;
265            let line = lines[line_idx];
266
267            // Calculate actual line length (used in warning messages)
268            let effective_length = self.calculate_effective_length(line);
269
270            // Use single line length limit for all content
271            let line_limit = effective_config.line_length.get();
272
273            // In non-strict mode, forgive the trailing non-whitespace run.
274            // If the line only exceeds the limit because of a long token at the end
275            // (URL, link chain, identifier), it passes. This matches markdownlint's
276            // behavior: line.replace(/\S*$/u, "#")
277            let check_length = if effective_config.strict {
278                effective_length
279            } else {
280                match line.rfind(char::is_whitespace) {
281                    Some(pos) => {
282                        let ws_char = line[pos..].chars().next().unwrap();
283                        let prefix_end = pos + ws_char.len_utf8();
284                        self.calculate_string_length(&line[..prefix_end]) + 1
285                    }
286                    None => 1, // No whitespace — entire line is a single token
287                }
288            };
289
290            // Skip lines where the check length is within the limit
291            if check_length <= line_limit {
292                continue;
293            }
294
295            // Semantic link understanding: suppress when excess comes entirely from inline URLs
296            if !effective_config.strict {
297                let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
298                if text_only_length <= line_limit {
299                    continue;
300                }
301            }
302
303            // Skip mkdocstrings blocks (already handled by LintContext)
304            if ctx.lines[line_idx].in_mkdocstrings {
305                continue;
306            }
307
308            // Link reference definitions are always exempt, even in strict mode.
309            // There's no way to shorten them without breaking the URL.
310            // Also check after stripping list markers, since list items may
311            // contain link ref defs as their content.
312            {
313                let trimmed = line.trim();
314                if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
315                    continue;
316                }
317                if is_list_item(trimmed) {
318                    let (_, content) = extract_list_marker_and_content(trimmed);
319                    let content_trimmed = content.trim();
320                    if content_trimmed.starts_with('[')
321                        && content_trimmed.contains("]:")
322                        && LINK_REF_PATTERN.is_match(content_trimmed)
323                    {
324                        continue;
325                    }
326                }
327            }
328
329            // Skip various block types efficiently
330            if !effective_config.strict {
331                // Lines whose only content is a link/image are exempt.
332                // After stripping list markers, blockquote markers, and emphasis,
333                // if only a link or image remains, there is no way to shorten it.
334                if is_standalone_link_or_image_line(line) {
335                    continue;
336                }
337
338                // Skip setext heading underlines
339                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
340                    continue;
341                }
342
343                // Skip block elements according to config flags
344                // The flags mean: true = check these elements, false = skip these elements
345                // So we skip when the flag is FALSE and the line is in that element type
346                if (!effective_config.headings && heading_lines_set.contains(&line_number))
347                    || (!effective_config.code_blocks
348                        && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
349                    || (!effective_config.tables && table_lines_set.contains(&line_number))
350                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
351                    || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
352                    || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
353                    || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
354                    || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
355                {
356                    continue;
357                }
358
359                // Check if this is a paragraph/regular text line
360                // If paragraphs = false, skip lines that are NOT in special blocks
361                if !effective_config.paragraphs {
362                    let is_special_block = heading_lines_set.contains(&line_number)
363                        || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
364                        || table_lines_set.contains(&line_number)
365                        || ctx.lines[line_number - 1].blockquote.is_some()
366                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
367                        || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
368                        || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
369                        || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
370                        || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
371                        || ctx
372                            .line_info(line_number)
373                            .is_some_and(|info| info.in_mkdocs_container());
374
375                    // Skip regular paragraph text when paragraphs = false
376                    if !is_special_block {
377                        continue;
378                    }
379                }
380
381                // Skip lines that are only a URL, image ref, or link ref
382                if self.should_ignore_line(line, lines, line_idx, ctx) {
383                    continue;
384                }
385            }
386
387            // In sentence-per-line mode, check if this is a single long sentence
388            // If so, emit a warning without a fix (user must manually rephrase)
389            if effective_config.reflow_mode == ReflowMode::SentencePerLine {
390                let sentences = split_into_sentences(line.trim());
391                if sentences.len() == 1 {
392                    // Single sentence that's too long - warn but don't auto-fix
393                    let message = format!("Line length {effective_length} exceeds {line_limit} characters");
394
395                    let (start_line, start_col, end_line, end_col) =
396                        calculate_excess_range(line_number, line, line_limit);
397
398                    warnings.push(LintWarning {
399                        rule_name: Some(self.name().to_string()),
400                        message,
401                        line: start_line,
402                        column: start_col,
403                        end_line,
404                        end_column: end_col,
405                        severity: Severity::Warning,
406                        fix: None, // No auto-fix for long single sentences
407                    });
408                    continue;
409                }
410                // Multiple sentences will be handled by paragraph-based reflow
411                continue;
412            }
413
414            // In semantic-line-breaks mode, skip per-line checks —
415            // all reflow is handled at the paragraph level with cascading splits
416            if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
417                continue;
418            }
419
420            // Don't provide fix for individual lines when reflow is enabled
421            // Paragraph-based fixes will be handled separately
422            let fix = None;
423
424            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
425
426            // Calculate precise character range for the excess portion
427            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
428
429            warnings.push(LintWarning {
430                rule_name: Some(self.name().to_string()),
431                message,
432                line: start_line,
433                column: start_col,
434                end_line,
435                end_column: end_col,
436                severity: Severity::Warning,
437                fix,
438            });
439        }
440
441        // If reflow is enabled, generate paragraph-based fixes
442        if effective_config.reflow {
443            let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
444            // Merge paragraph warnings with line warnings, removing duplicates
445            for pw in paragraph_warnings {
446                // Remove any line warnings that overlap with this paragraph
447                warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
448                warnings.push(pw);
449            }
450        }
451
452        Ok(warnings)
453    }
454
455    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
456        // For CLI usage, apply fixes from warnings
457        // LSP will use the warning-based fixes directly
458        let warnings = self.check(ctx)?;
459
460        // If there are no fixes, return content unchanged
461        if !warnings.iter().any(|w| w.fix.is_some()) {
462            return Ok(ctx.content.to_string());
463        }
464
465        // Apply warning-based fixes
466        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
467            .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
468    }
469
470    fn as_any(&self) -> &dyn std::any::Any {
471        self
472    }
473
474    fn category(&self) -> RuleCategory {
475        RuleCategory::Whitespace
476    }
477
478    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
479        self.should_skip_with_config(ctx, &self.config)
480    }
481
482    fn default_config_section(&self) -> Option<(String, toml::Value)> {
483        let default_config = MD013Config::default();
484        let json_value = serde_json::to_value(&default_config).ok()?;
485        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
486
487        if let toml::Value::Table(table) = toml_value {
488            if !table.is_empty() {
489                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
490            } else {
491                None
492            }
493        } else {
494            None
495        }
496    }
497
498    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
499        let mut aliases = std::collections::HashMap::new();
500        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
501        Some(aliases)
502    }
503
504    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
505    where
506        Self: Sized,
507    {
508        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
509        // Use global line_length if rule-specific config still has default value
510        if rule_config.line_length.get() == 80 {
511            rule_config.line_length = config.global.line_length;
512        }
513        Box::new(Self::from_config_struct(rule_config))
514    }
515}
516
517impl MD013LineLength {
518    fn is_blockquote_content_boundary(
519        &self,
520        content: &str,
521        line_num: usize,
522        ctx: &crate::lint_context::LintContext,
523    ) -> bool {
524        let trimmed = content.trim();
525
526        trimmed.is_empty()
527            || ctx.line_info(line_num).is_some_and(|info| {
528                info.in_code_block
529                    || info.in_front_matter
530                    || info.in_html_block
531                    || info.in_html_comment
532                    || info.in_esm_block
533                    || info.in_jsx_expression
534                    || info.in_mdx_comment
535                    || info.in_mkdocstrings
536                    || info.in_mkdocs_container()
537                    || info.is_div_marker
538            })
539            || trimmed.starts_with('#')
540            || trimmed.starts_with("```")
541            || trimmed.starts_with("~~~")
542            || trimmed.starts_with('>')
543            || TableUtils::is_potential_table_row(content)
544            || is_list_item(trimmed)
545            || is_horizontal_rule(trimmed)
546            || (trimmed.starts_with('[') && content.contains("]:"))
547            || is_template_directive_only(content)
548            || is_standalone_attr_list(content)
549            || is_snippet_block_delimiter(content)
550            || is_github_alert_marker(trimmed)
551    }
552
553    fn generate_blockquote_paragraph_fix(
554        &self,
555        ctx: &crate::lint_context::LintContext,
556        config: &MD013Config,
557        lines: &[&str],
558        line_index: &LineIndex,
559        start_idx: usize,
560        line_ending: &str,
561    ) -> (Option<LintWarning>, usize) {
562        let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
563            return (None, start_idx + 1);
564        };
565        let target_level = start_bq.nesting_level;
566
567        let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
568        let mut i = start_idx;
569
570        while i < lines.len() {
571            if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
572                break;
573            }
574
575            let line_num = i + 1;
576            if line_num > ctx.lines.len() {
577                break;
578            }
579
580            if lines[i].trim().is_empty() {
581                break;
582            }
583
584            let line_bq = ctx.lines[i].blockquote.as_deref();
585            if let Some(bq) = line_bq {
586                if bq.nesting_level != target_level {
587                    break;
588                }
589
590                if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
591                    break;
592                }
593
594                collected.push(CollectedBlockquoteLine {
595                    line_idx: i,
596                    data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
597                });
598                i += 1;
599                continue;
600            }
601
602            let lazy_content = lines[i].trim_start();
603            if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
604                break;
605            }
606
607            collected.push(CollectedBlockquoteLine {
608                line_idx: i,
609                data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
610            });
611            i += 1;
612        }
613
614        if collected.is_empty() {
615            return (None, start_idx + 1);
616        }
617
618        let next_idx = i;
619        let paragraph_start = collected[0].line_idx;
620        let end_line = collected[collected.len() - 1].line_idx;
621        let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
622        let paragraph_text = line_data
623            .iter()
624            .map(|d| d.content.as_str())
625            .collect::<Vec<_>>()
626            .join(" ");
627
628        let contains_definition_list = line_data
629            .iter()
630            .any(|d| crate::utils::is_definition_list_item(&d.content));
631        if contains_definition_list {
632            return (None, next_idx);
633        }
634
635        let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
636        if contains_snippets {
637            return (None, next_idx);
638        }
639
640        let needs_reflow = match config.reflow_mode {
641            ReflowMode::Normalize => line_data.len() > 1,
642            ReflowMode::SentencePerLine => {
643                let sentences = split_into_sentences(&paragraph_text);
644                sentences.len() > 1 || line_data.len() > 1
645            }
646            ReflowMode::SemanticLineBreaks => {
647                let sentences = split_into_sentences(&paragraph_text);
648                sentences.len() > 1
649                    || line_data.len() > 1
650                    || collected
651                        .iter()
652                        .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
653            }
654            ReflowMode::Default => collected
655                .iter()
656                .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
657        };
658
659        if !needs_reflow {
660            return (None, next_idx);
661        }
662
663        let fallback_prefix = start_bq.prefix.clone();
664        let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
665        let continuation_style = blockquote_continuation_style(&line_data);
666
667        let reflow_line_length = if config.line_length.is_unlimited() {
668            usize::MAX
669        } else {
670            config
671                .line_length
672                .get()
673                .saturating_sub(self.calculate_string_length(&explicit_prefix))
674                .max(1)
675        };
676
677        let reflow_options = crate::utils::text_reflow::ReflowOptions {
678            line_length: reflow_line_length,
679            break_on_sentences: true,
680            preserve_breaks: false,
681            sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
682            semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
683            abbreviations: config.abbreviations_for_reflow(),
684            length_mode: self.reflow_length_mode(),
685        };
686
687        let reflowed_with_style =
688            reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
689
690        if reflowed_with_style.is_empty() {
691            return (None, next_idx);
692        }
693
694        let reflowed_text = reflowed_with_style.join(line_ending);
695
696        let start_range = line_index.whole_line_range(paragraph_start + 1);
697        let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
698            line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
699        } else {
700            line_index.whole_line_range(end_line + 1)
701        };
702        let byte_range = start_range.start..end_range.end;
703
704        let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
705            format!("{reflowed_text}{line_ending}")
706        } else {
707            reflowed_text
708        };
709
710        let original_text = &ctx.content[byte_range.clone()];
711        if original_text == replacement {
712            return (None, next_idx);
713        }
714
715        let (warning_line, warning_end_line) = match config.reflow_mode {
716            ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
717            ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
718            ReflowMode::Default => {
719                let violating_line = collected
720                    .iter()
721                    .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
722                    .map(|line| line.line_idx + 1)
723                    .unwrap_or(paragraph_start + 1);
724                (violating_line, violating_line)
725            }
726        };
727
728        let warning = LintWarning {
729            rule_name: Some(self.name().to_string()),
730            message: match config.reflow_mode {
731                ReflowMode::Normalize => format!(
732                    "Paragraph could be normalized to use line length of {} characters",
733                    config.line_length.get()
734                ),
735                ReflowMode::SentencePerLine => {
736                    let num_sentences = split_into_sentences(&paragraph_text).len();
737                    if line_data.len() == 1 {
738                        format!("Line contains {num_sentences} sentences (one sentence per line required)")
739                    } else {
740                        let num_lines = line_data.len();
741                        format!(
742                            "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
743                        )
744                    }
745                }
746                ReflowMode::SemanticLineBreaks => {
747                    let num_sentences = split_into_sentences(&paragraph_text).len();
748                    format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
749                }
750                ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
751            },
752            line: warning_line,
753            column: 1,
754            end_line: warning_end_line,
755            end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
756            severity: Severity::Warning,
757            fix: Some(crate::rule::Fix {
758                range: byte_range,
759                replacement,
760            }),
761        };
762
763        (Some(warning), next_idx)
764    }
765
766    /// Generate paragraph-based fixes
767    fn generate_paragraph_fixes(
768        &self,
769        ctx: &crate::lint_context::LintContext,
770        config: &MD013Config,
771        lines: &[&str],
772    ) -> Vec<LintWarning> {
773        let mut warnings = Vec::new();
774        let line_index = LineIndex::new(ctx.content);
775
776        // Detect the content's line ending style to preserve it in replacements.
777        // The LSP receives content from editors which may use CRLF (Windows).
778        // Replacements must match the original line endings to avoid false positives.
779        let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
780
781        let mut i = 0;
782        while i < lines.len() {
783            let line_num = i + 1;
784
785            // Handle blockquote paragraphs with style-preserving reflow.
786            if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
787                let (warning, next_idx) =
788                    self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
789                if let Some(warning) = warning {
790                    warnings.push(warning);
791                }
792                i = next_idx;
793                continue;
794            }
795
796            // Skip special structures (but NOT MkDocs containers - those get special handling)
797            let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
798                info.in_code_block
799                    || info.in_front_matter
800                    || info.in_html_block
801                    || info.in_html_comment
802                    || info.in_esm_block
803                    || info.in_jsx_expression
804                    || info.in_mdx_comment
805                    || info.in_mkdocstrings
806            });
807
808            if should_skip_due_to_line_info
809                || lines[i].trim().starts_with('#')
810                || TableUtils::is_potential_table_row(lines[i])
811                || lines[i].trim().is_empty()
812                || is_horizontal_rule(lines[i].trim())
813                || is_template_directive_only(lines[i])
814                || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
815                || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
816            {
817                i += 1;
818                continue;
819            }
820
821            // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
822            if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
823                // Skip admonition/tab marker lines — only reflow their indented content
824                let current_line = lines[i];
825                if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
826                    i += 1;
827                    continue;
828                }
829
830                let container_start = i;
831
832                // Detect the actual indent level from the first content line
833                // (supports nested admonitions with 8+ spaces)
834                let first_line = lines[i];
835                let base_indent_len = first_line.len() - first_line.trim_start().len();
836                let base_indent: String = " ".repeat(base_indent_len);
837
838                // Collect consecutive MkDocs container paragraph lines
839                let mut container_lines: Vec<&str> = Vec::new();
840                while i < lines.len() {
841                    let current_line_num = i + 1;
842                    let line_info = ctx.line_info(current_line_num);
843
844                    // Stop if we leave the MkDocs container
845                    if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
846                        break;
847                    }
848
849                    let line = lines[i];
850
851                    // Stop at paragraph boundaries within the container
852                    if line.trim().is_empty() {
853                        break;
854                    }
855
856                    // Skip list items, code blocks, headings within containers
857                    if is_list_item(line.trim())
858                        || line.trim().starts_with("```")
859                        || line.trim().starts_with("~~~")
860                        || line.trim().starts_with('#')
861                    {
862                        break;
863                    }
864
865                    container_lines.push(line);
866                    i += 1;
867                }
868
869                if container_lines.is_empty() {
870                    // Must advance i to avoid infinite loop when we encounter
871                    // non-paragraph content (code block, list, heading, empty line)
872                    // at the start of an MkDocs container
873                    i += 1;
874                    continue;
875                }
876
877                // Strip the base indent from each line and join for reflow
878                let stripped_lines: Vec<&str> = container_lines
879                    .iter()
880                    .map(|line| {
881                        if line.starts_with(&base_indent) {
882                            &line[base_indent_len..]
883                        } else {
884                            line.trim_start()
885                        }
886                    })
887                    .collect();
888                let paragraph_text = stripped_lines.join(" ");
889
890                // Check if reflow is needed
891                let needs_reflow = match config.reflow_mode {
892                    ReflowMode::Normalize => container_lines.len() > 1,
893                    ReflowMode::SentencePerLine => {
894                        let sentences = split_into_sentences(&paragraph_text);
895                        sentences.len() > 1 || container_lines.len() > 1
896                    }
897                    ReflowMode::SemanticLineBreaks => {
898                        let sentences = split_into_sentences(&paragraph_text);
899                        sentences.len() > 1
900                            || container_lines.len() > 1
901                            || container_lines
902                                .iter()
903                                .any(|line| self.calculate_effective_length(line) > config.line_length.get())
904                    }
905                    ReflowMode::Default => container_lines
906                        .iter()
907                        .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
908                };
909
910                if !needs_reflow {
911                    continue;
912                }
913
914                // Calculate byte range for this container paragraph
915                let start_range = line_index.whole_line_range(container_start + 1);
916                let end_line = container_start + container_lines.len() - 1;
917                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
918                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
919                } else {
920                    line_index.whole_line_range(end_line + 1)
921                };
922                let byte_range = start_range.start..end_range.end;
923
924                // Reflow with adjusted line length (accounting for the 4-space indent)
925                let reflow_line_length = if config.line_length.is_unlimited() {
926                    usize::MAX
927                } else {
928                    config.line_length.get().saturating_sub(base_indent_len).max(1)
929                };
930                let reflow_options = crate::utils::text_reflow::ReflowOptions {
931                    line_length: reflow_line_length,
932                    break_on_sentences: true,
933                    preserve_breaks: false,
934                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
935                    semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
936                    abbreviations: config.abbreviations_for_reflow(),
937                    length_mode: self.reflow_length_mode(),
938                };
939                let reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
940
941                // Re-add the 4-space indent to each reflowed line
942                let reflowed_with_indent: Vec<String> =
943                    reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
944                let reflowed_text = reflowed_with_indent.join(line_ending);
945
946                // Preserve trailing newline
947                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
948                    format!("{reflowed_text}{line_ending}")
949                } else {
950                    reflowed_text
951                };
952
953                // Only generate a warning if the replacement is different
954                let original_text = &ctx.content[byte_range.clone()];
955                if original_text != replacement {
956                    warnings.push(LintWarning {
957                        rule_name: Some(self.name().to_string()),
958                        message: format!(
959                            "Line length {} exceeds {} characters (in MkDocs container)",
960                            container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
961                            config.line_length.get()
962                        ),
963                        line: container_start + 1,
964                        column: 1,
965                        end_line: end_line + 1,
966                        end_column: lines[end_line].len() + 1,
967                        severity: Severity::Warning,
968                        fix: Some(crate::rule::Fix {
969                            range: byte_range,
970                            replacement,
971                        }),
972                    });
973                }
974                continue;
975            }
976
977            // Helper function to detect semantic line markers
978            let is_semantic_line = |content: &str| -> bool {
979                let trimmed = content.trim_start();
980                let semantic_markers = [
981                    "NOTE:",
982                    "WARNING:",
983                    "IMPORTANT:",
984                    "CAUTION:",
985                    "TIP:",
986                    "DANGER:",
987                    "HINT:",
988                    "INFO:",
989                ];
990                semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
991            };
992
993            // Helper function to detect fence markers (opening or closing)
994            let is_fence_marker = |content: &str| -> bool {
995                let trimmed = content.trim_start();
996                trimmed.starts_with("```") || trimmed.starts_with("~~~")
997            };
998
999            // Check if this is a list item - handle it specially
1000            let trimmed = lines[i].trim();
1001            if is_list_item(trimmed) {
1002                // Collect the entire list item including continuation lines
1003                let list_start = i;
1004                let (marker, first_content) = extract_list_marker_and_content(lines[i]);
1005                let marker_len = marker.len();
1006
1007                // Track lines and their types (content, code block, fence, nested list)
1008                #[derive(Clone)]
1009                enum LineType {
1010                    Content(String),
1011                    CodeBlock(String, usize),      // content and original indent
1012                    NestedListItem(String, usize), // full line content and original indent
1013                    SemanticLine(String),          // Lines starting with NOTE:, WARNING:, etc that should stay separate
1014                    SnippetLine(String),           // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1015                    DivMarker(String),             // Quarto/Pandoc div markers (::: opening or closing)
1016                    Empty,
1017                }
1018
1019                let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1020                i += 1;
1021
1022                // Collect continuation lines using ctx.lines for metadata
1023                while i < lines.len() {
1024                    let line_info = &ctx.lines[i];
1025
1026                    // Use pre-computed is_blank from ctx
1027                    if line_info.is_blank {
1028                        // Empty line - check if next line is indented (part of list item)
1029                        if i + 1 < lines.len() {
1030                            let next_info = &ctx.lines[i + 1];
1031
1032                            // Check if next line is indented enough to be continuation
1033                            if !next_info.is_blank && next_info.indent >= marker_len {
1034                                // This blank line is between paragraphs/blocks in the list item
1035                                list_item_lines.push(LineType::Empty);
1036                                i += 1;
1037                                continue;
1038                            }
1039                        }
1040                        // No indented line after blank, end of list item
1041                        break;
1042                    }
1043
1044                    // Use pre-computed indent from ctx
1045                    let indent = line_info.indent;
1046
1047                    // Valid continuation must be indented at least marker_len
1048                    if indent >= marker_len {
1049                        let trimmed = line_info.content(ctx.content).trim();
1050
1051                        // Use pre-computed in_code_block from ctx
1052                        if line_info.in_code_block {
1053                            list_item_lines.push(LineType::CodeBlock(
1054                                line_info.content(ctx.content)[indent..].to_string(),
1055                                indent,
1056                            ));
1057                            i += 1;
1058                            continue;
1059                        }
1060
1061                        // Check if this is a SIBLING list item (breaks parent)
1062                        // Nested lists are indented >= marker_len and are PART of the parent item
1063                        // Siblings are at indent < marker_len (at or before parent marker)
1064                        if is_list_item(trimmed) && indent < marker_len {
1065                            // This is a sibling item at same or higher level - end parent item
1066                            break;
1067                        }
1068
1069                        // Check if this is a NESTED list item marker
1070                        // Nested lists should be processed separately UNLESS they're part of a
1071                        // multi-paragraph list item (indicated by a blank line before them OR
1072                        // it's a continuation of an already-started nested list)
1073                        if is_list_item(trimmed) && indent >= marker_len {
1074                            // Check if there was a blank line before this (multi-paragraph context)
1075                            let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
1076
1077                            // Check if we've already seen nested list content (another nested item)
1078                            let has_nested_content = list_item_lines.iter().any(|line| {
1079                                matches!(line, LineType::Content(c) if is_list_item(c.trim()))
1080                                    || matches!(line, LineType::NestedListItem(_, _))
1081                            });
1082
1083                            if !has_blank_before && !has_nested_content {
1084                                // Single-paragraph context with no prior nested items: starts a new item
1085                                // End parent collection; nested list will be processed next
1086                                break;
1087                            }
1088                            // else: multi-paragraph context or continuation of nested list, keep collecting
1089                            // Mark this as a nested list item to preserve its structure
1090                            list_item_lines.push(LineType::NestedListItem(
1091                                line_info.content(ctx.content)[indent..].to_string(),
1092                                indent,
1093                            ));
1094                            i += 1;
1095                            continue;
1096                        }
1097
1098                        // Normal continuation: marker_len to marker_len+3
1099                        if indent <= marker_len + 3 {
1100                            // Extract content (remove indentation and trailing whitespace)
1101                            // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1102                            // See: https://github.com/rvben/rumdl/issues/76
1103                            let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1104
1105                            // Check if this is a div marker (::: opening or closing)
1106                            // These must be preserved on their own line, not merged into paragraphs
1107                            if line_info.is_div_marker {
1108                                list_item_lines.push(LineType::DivMarker(content));
1109                            }
1110                            // Check if this is a fence marker (opening or closing)
1111                            // These should be treated as code block lines, not paragraph content
1112                            else if is_fence_marker(&content) {
1113                                list_item_lines.push(LineType::CodeBlock(content, indent));
1114                            }
1115                            // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1116                            else if is_semantic_line(&content) {
1117                                list_item_lines.push(LineType::SemanticLine(content));
1118                            }
1119                            // Check if this is a snippet block delimiter (-8<- or --8<--)
1120                            // These must be preserved on their own lines for MkDocs Snippets extension
1121                            else if is_snippet_block_delimiter(&content) {
1122                                list_item_lines.push(LineType::SnippetLine(content));
1123                            } else {
1124                                list_item_lines.push(LineType::Content(content));
1125                            }
1126                            i += 1;
1127                        } else {
1128                            // indent >= marker_len + 4: indented code block
1129                            list_item_lines.push(LineType::CodeBlock(
1130                                line_info.content(ctx.content)[indent..].to_string(),
1131                                indent,
1132                            ));
1133                            i += 1;
1134                        }
1135                    } else {
1136                        // Not indented enough, end of list item
1137                        break;
1138                    }
1139                }
1140
1141                let indent_size = marker_len;
1142                let expected_indent = " ".repeat(indent_size);
1143
1144                // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1145                #[derive(Clone)]
1146                enum Block {
1147                    Paragraph(Vec<String>),
1148                    Code {
1149                        lines: Vec<(String, usize)>, // (content, indent) pairs
1150                        has_preceding_blank: bool,   // Whether there was a blank line before this block
1151                    },
1152                    NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
1153                    SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1154                    SnippetLine(String),  // MkDocs Snippets delimiter that stays on its own line without extra spacing
1155                    DivMarker(String),    // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1156                    Html {
1157                        lines: Vec<String>,        // HTML content preserved exactly as-is
1158                        has_preceding_blank: bool, // Whether there was a blank line before this block
1159                    },
1160                }
1161
1162                // HTML tag detection helpers
1163                // Block-level HTML tags that should trigger HTML block detection
1164                const BLOCK_LEVEL_TAGS: &[&str] = &[
1165                    "div",
1166                    "details",
1167                    "summary",
1168                    "section",
1169                    "article",
1170                    "header",
1171                    "footer",
1172                    "nav",
1173                    "aside",
1174                    "main",
1175                    "table",
1176                    "thead",
1177                    "tbody",
1178                    "tfoot",
1179                    "tr",
1180                    "td",
1181                    "th",
1182                    "ul",
1183                    "ol",
1184                    "li",
1185                    "dl",
1186                    "dt",
1187                    "dd",
1188                    "pre",
1189                    "blockquote",
1190                    "figure",
1191                    "figcaption",
1192                    "form",
1193                    "fieldset",
1194                    "legend",
1195                    "hr",
1196                    "p",
1197                    "h1",
1198                    "h2",
1199                    "h3",
1200                    "h4",
1201                    "h5",
1202                    "h6",
1203                    "style",
1204                    "script",
1205                    "noscript",
1206                ];
1207
1208                fn is_block_html_opening_tag(line: &str) -> Option<String> {
1209                    let trimmed = line.trim();
1210
1211                    // Check for HTML comments
1212                    if trimmed.starts_with("<!--") {
1213                        return Some("!--".to_string());
1214                    }
1215
1216                    // Check for opening tags
1217                    if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1218                        // Extract tag name from <tagname ...> or <tagname>
1219                        let after_bracket = &trimmed[1..];
1220                        if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1221                            let tag_name = after_bracket[..end].to_lowercase();
1222
1223                            // Only treat as block if it's a known block-level tag
1224                            if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1225                                return Some(tag_name);
1226                            }
1227                        }
1228                    }
1229                    None
1230                }
1231
1232                fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1233                    let trimmed = line.trim();
1234
1235                    // Special handling for HTML comments
1236                    if tag_name == "!--" {
1237                        return trimmed.ends_with("-->");
1238                    }
1239
1240                    // Check for closing tags: </tagname> or </tagname ...>
1241                    trimmed.starts_with(&format!("</{tag_name}>"))
1242                        || trimmed.starts_with(&format!("</{tag_name}  "))
1243                        || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1244                }
1245
1246                fn is_self_closing_tag(line: &str) -> bool {
1247                    let trimmed = line.trim();
1248                    trimmed.ends_with("/>")
1249                }
1250
1251                let mut blocks: Vec<Block> = Vec::new();
1252                let mut current_paragraph: Vec<String> = Vec::new();
1253                let mut current_code_block: Vec<(String, usize)> = Vec::new();
1254                let mut current_nested_list: Vec<(String, usize)> = Vec::new();
1255                let mut current_html_block: Vec<String> = Vec::new();
1256                let mut html_tag_stack: Vec<String> = Vec::new();
1257                let mut in_code = false;
1258                let mut in_nested_list = false;
1259                let mut in_html_block = false;
1260                let mut had_preceding_blank = false; // Track if we just saw an empty line
1261                let mut code_block_has_preceding_blank = false; // Track blank before current code block
1262                let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1263
1264                for line in &list_item_lines {
1265                    match line {
1266                        LineType::Empty => {
1267                            if in_code {
1268                                current_code_block.push((String::new(), 0));
1269                            } else if in_nested_list {
1270                                current_nested_list.push((String::new(), 0));
1271                            } else if in_html_block {
1272                                // Allow blank lines inside HTML blocks
1273                                current_html_block.push(String::new());
1274                            } else if !current_paragraph.is_empty() {
1275                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1276                                current_paragraph.clear();
1277                            }
1278                            // Mark that we saw a blank line
1279                            had_preceding_blank = true;
1280                        }
1281                        LineType::Content(content) => {
1282                            // Check if we're currently in an HTML block
1283                            if in_html_block {
1284                                current_html_block.push(content.clone());
1285
1286                                // Check if this line closes any open HTML tags
1287                                if let Some(last_tag) = html_tag_stack.last() {
1288                                    if is_html_closing_tag(content, last_tag) {
1289                                        html_tag_stack.pop();
1290
1291                                        // If stack is empty, HTML block is complete
1292                                        if html_tag_stack.is_empty() {
1293                                            blocks.push(Block::Html {
1294                                                lines: current_html_block.clone(),
1295                                                has_preceding_blank: html_block_has_preceding_blank,
1296                                            });
1297                                            current_html_block.clear();
1298                                            in_html_block = false;
1299                                        }
1300                                    } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1301                                        // Nested opening tag within HTML block
1302                                        if !is_self_closing_tag(content) {
1303                                            html_tag_stack.push(new_tag);
1304                                        }
1305                                    }
1306                                }
1307                                had_preceding_blank = false;
1308                            } else {
1309                                // Not in HTML block - check if this line starts one
1310                                if let Some(tag_name) = is_block_html_opening_tag(content) {
1311                                    // Flush current paragraph before starting HTML block
1312                                    if in_code {
1313                                        blocks.push(Block::Code {
1314                                            lines: current_code_block.clone(),
1315                                            has_preceding_blank: code_block_has_preceding_blank,
1316                                        });
1317                                        current_code_block.clear();
1318                                        in_code = false;
1319                                    } else if in_nested_list {
1320                                        blocks.push(Block::NestedList(current_nested_list.clone()));
1321                                        current_nested_list.clear();
1322                                        in_nested_list = false;
1323                                    } else if !current_paragraph.is_empty() {
1324                                        blocks.push(Block::Paragraph(current_paragraph.clone()));
1325                                        current_paragraph.clear();
1326                                    }
1327
1328                                    // Start new HTML block
1329                                    in_html_block = true;
1330                                    html_block_has_preceding_blank = had_preceding_blank;
1331                                    current_html_block.push(content.clone());
1332
1333                                    // Check if it's self-closing or needs a closing tag
1334                                    if is_self_closing_tag(content) {
1335                                        // Self-closing tag - complete the HTML block immediately
1336                                        blocks.push(Block::Html {
1337                                            lines: current_html_block.clone(),
1338                                            has_preceding_blank: html_block_has_preceding_blank,
1339                                        });
1340                                        current_html_block.clear();
1341                                        in_html_block = false;
1342                                    } else {
1343                                        // Regular opening tag - push to stack
1344                                        html_tag_stack.push(tag_name);
1345                                    }
1346                                } else {
1347                                    // Regular content line - add to paragraph
1348                                    if in_code {
1349                                        // Switching from code to content
1350                                        blocks.push(Block::Code {
1351                                            lines: current_code_block.clone(),
1352                                            has_preceding_blank: code_block_has_preceding_blank,
1353                                        });
1354                                        current_code_block.clear();
1355                                        in_code = false;
1356                                    } else if in_nested_list {
1357                                        // Switching from nested list to content
1358                                        blocks.push(Block::NestedList(current_nested_list.clone()));
1359                                        current_nested_list.clear();
1360                                        in_nested_list = false;
1361                                    }
1362                                    current_paragraph.push(content.clone());
1363                                }
1364                                had_preceding_blank = false; // Reset after content
1365                            }
1366                        }
1367                        LineType::CodeBlock(content, indent) => {
1368                            if in_nested_list {
1369                                // Switching from nested list to code
1370                                blocks.push(Block::NestedList(current_nested_list.clone()));
1371                                current_nested_list.clear();
1372                                in_nested_list = false;
1373                            } else if in_html_block {
1374                                // Switching from HTML block to code (shouldn't happen normally, but handle it)
1375                                blocks.push(Block::Html {
1376                                    lines: current_html_block.clone(),
1377                                    has_preceding_blank: html_block_has_preceding_blank,
1378                                });
1379                                current_html_block.clear();
1380                                html_tag_stack.clear();
1381                                in_html_block = false;
1382                            }
1383                            if !in_code {
1384                                // Switching from content to code
1385                                if !current_paragraph.is_empty() {
1386                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
1387                                    current_paragraph.clear();
1388                                }
1389                                in_code = true;
1390                                // Record whether there was a blank line before this code block
1391                                code_block_has_preceding_blank = had_preceding_blank;
1392                            }
1393                            current_code_block.push((content.clone(), *indent));
1394                            had_preceding_blank = false; // Reset after code
1395                        }
1396                        LineType::NestedListItem(content, indent) => {
1397                            if in_code {
1398                                // Switching from code to nested list
1399                                blocks.push(Block::Code {
1400                                    lines: current_code_block.clone(),
1401                                    has_preceding_blank: code_block_has_preceding_blank,
1402                                });
1403                                current_code_block.clear();
1404                                in_code = false;
1405                            } else if in_html_block {
1406                                // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1407                                blocks.push(Block::Html {
1408                                    lines: current_html_block.clone(),
1409                                    has_preceding_blank: html_block_has_preceding_blank,
1410                                });
1411                                current_html_block.clear();
1412                                html_tag_stack.clear();
1413                                in_html_block = false;
1414                            }
1415                            if !in_nested_list {
1416                                // Switching from content to nested list
1417                                if !current_paragraph.is_empty() {
1418                                    blocks.push(Block::Paragraph(current_paragraph.clone()));
1419                                    current_paragraph.clear();
1420                                }
1421                                in_nested_list = true;
1422                            }
1423                            current_nested_list.push((content.clone(), *indent));
1424                            had_preceding_blank = false; // Reset after nested list
1425                        }
1426                        LineType::SemanticLine(content) => {
1427                            // Semantic lines are standalone - flush any current block and add as separate block
1428                            if in_code {
1429                                blocks.push(Block::Code {
1430                                    lines: current_code_block.clone(),
1431                                    has_preceding_blank: code_block_has_preceding_blank,
1432                                });
1433                                current_code_block.clear();
1434                                in_code = false;
1435                            } else if in_nested_list {
1436                                blocks.push(Block::NestedList(current_nested_list.clone()));
1437                                current_nested_list.clear();
1438                                in_nested_list = false;
1439                            } else if in_html_block {
1440                                blocks.push(Block::Html {
1441                                    lines: current_html_block.clone(),
1442                                    has_preceding_blank: html_block_has_preceding_blank,
1443                                });
1444                                current_html_block.clear();
1445                                html_tag_stack.clear();
1446                                in_html_block = false;
1447                            } else if !current_paragraph.is_empty() {
1448                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1449                                current_paragraph.clear();
1450                            }
1451                            // Add semantic line as its own block
1452                            blocks.push(Block::SemanticLine(content.clone()));
1453                            had_preceding_blank = false; // Reset after semantic line
1454                        }
1455                        LineType::SnippetLine(content) => {
1456                            // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1457                            // Unlike semantic lines, snippet lines don't add extra blank lines around them
1458                            if in_code {
1459                                blocks.push(Block::Code {
1460                                    lines: current_code_block.clone(),
1461                                    has_preceding_blank: code_block_has_preceding_blank,
1462                                });
1463                                current_code_block.clear();
1464                                in_code = false;
1465                            } else if in_nested_list {
1466                                blocks.push(Block::NestedList(current_nested_list.clone()));
1467                                current_nested_list.clear();
1468                                in_nested_list = false;
1469                            } else if in_html_block {
1470                                blocks.push(Block::Html {
1471                                    lines: current_html_block.clone(),
1472                                    has_preceding_blank: html_block_has_preceding_blank,
1473                                });
1474                                current_html_block.clear();
1475                                html_tag_stack.clear();
1476                                in_html_block = false;
1477                            } else if !current_paragraph.is_empty() {
1478                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1479                                current_paragraph.clear();
1480                            }
1481                            // Add snippet line as its own block
1482                            blocks.push(Block::SnippetLine(content.clone()));
1483                            had_preceding_blank = false;
1484                        }
1485                        LineType::DivMarker(content) => {
1486                            // Div markers (::: opening or closing) are standalone structural delimiters
1487                            // Flush any current block and add as separate block
1488                            if in_code {
1489                                blocks.push(Block::Code {
1490                                    lines: current_code_block.clone(),
1491                                    has_preceding_blank: code_block_has_preceding_blank,
1492                                });
1493                                current_code_block.clear();
1494                                in_code = false;
1495                            } else if in_nested_list {
1496                                blocks.push(Block::NestedList(current_nested_list.clone()));
1497                                current_nested_list.clear();
1498                                in_nested_list = false;
1499                            } else if in_html_block {
1500                                blocks.push(Block::Html {
1501                                    lines: current_html_block.clone(),
1502                                    has_preceding_blank: html_block_has_preceding_blank,
1503                                });
1504                                current_html_block.clear();
1505                                html_tag_stack.clear();
1506                                in_html_block = false;
1507                            } else if !current_paragraph.is_empty() {
1508                                blocks.push(Block::Paragraph(current_paragraph.clone()));
1509                                current_paragraph.clear();
1510                            }
1511                            blocks.push(Block::DivMarker(content.clone()));
1512                            had_preceding_blank = false;
1513                        }
1514                    }
1515                }
1516
1517                // Push remaining block
1518                if in_code && !current_code_block.is_empty() {
1519                    blocks.push(Block::Code {
1520                        lines: current_code_block,
1521                        has_preceding_blank: code_block_has_preceding_blank,
1522                    });
1523                } else if in_nested_list && !current_nested_list.is_empty() {
1524                    blocks.push(Block::NestedList(current_nested_list));
1525                } else if in_html_block && !current_html_block.is_empty() {
1526                    // If we still have an unclosed HTML block, push it anyway
1527                    // (malformed HTML - missing closing tag)
1528                    blocks.push(Block::Html {
1529                        lines: current_html_block,
1530                        has_preceding_blank: html_block_has_preceding_blank,
1531                    });
1532                } else if !current_paragraph.is_empty() {
1533                    blocks.push(Block::Paragraph(current_paragraph));
1534                }
1535
1536                // Helper: check if a line (raw source or stripped content) is exempt
1537                // from line-length checks. Link reference definitions are always exempt;
1538                // standalone link/image lines are exempt when strict mode is off.
1539                // Also checks content after stripping list markers, since list item
1540                // continuation lines may contain link ref defs.
1541                let is_exempt_line = |raw_line: &str| -> bool {
1542                    let trimmed = raw_line.trim();
1543                    // Link reference definitions: always exempt
1544                    if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
1545                        return true;
1546                    }
1547                    // Also check after stripping list markers (for list item content)
1548                    if is_list_item(trimmed) {
1549                        let (_, content) = extract_list_marker_and_content(trimmed);
1550                        let content_trimmed = content.trim();
1551                        if content_trimmed.starts_with('[')
1552                            && content_trimmed.contains("]:")
1553                            && LINK_REF_PATTERN.is_match(content_trimmed)
1554                        {
1555                            return true;
1556                        }
1557                    }
1558                    // Standalone link/image lines: exempt when not strict
1559                    if !config.strict && is_standalone_link_or_image_line(raw_line) {
1560                        return true;
1561                    }
1562                    false
1563                };
1564
1565                // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1566                // Exclude link reference definitions and standalone link lines from content
1567                // so they don't pollute combined_content or trigger false reflow.
1568                let content_lines: Vec<String> = list_item_lines
1569                    .iter()
1570                    .filter_map(|line| {
1571                        if let LineType::Content(s) = line {
1572                            if is_exempt_line(s) {
1573                                return None;
1574                            }
1575                            Some(s.clone())
1576                        } else {
1577                            None
1578                        }
1579                    })
1580                    .collect();
1581
1582                // Check if we need to reflow this list item
1583                // We check the combined content to see if it exceeds length limits
1584                let combined_content = content_lines.join(" ").trim().to_string();
1585
1586                // Helper to check if we should reflow in normalize mode
1587                let should_normalize = || {
1588                    // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1589                    // DO normalize if it has plain text content that spans multiple lines
1590                    let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1591                    let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1592                    let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1593                    let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1594                    let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1595                    let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1596
1597                    // If we have structural blocks but no paragraphs, don't normalize
1598                    if (has_nested_lists
1599                        || has_code_blocks
1600                        || has_semantic_lines
1601                        || has_snippet_lines
1602                        || has_div_markers)
1603                        && !has_paragraphs
1604                    {
1605                        return false;
1606                    }
1607
1608                    // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1609                    if has_paragraphs {
1610                        // Count only paragraphs that contain at least one non-exempt line.
1611                        // Paragraphs consisting entirely of link ref defs or standalone links
1612                        // should not trigger normalization.
1613                        let paragraph_count = blocks
1614                            .iter()
1615                            .filter(|b| {
1616                                if let Block::Paragraph(para_lines) = b {
1617                                    !para_lines.iter().all(|line| is_exempt_line(line))
1618                                } else {
1619                                    false
1620                                }
1621                            })
1622                            .count();
1623                        if paragraph_count > 1 {
1624                            // Multiple non-exempt paragraph blocks should be normalized
1625                            return true;
1626                        }
1627
1628                        // Single paragraph block: normalize if it has multiple content lines
1629                        if content_lines.len() > 1 {
1630                            return true;
1631                        }
1632                    }
1633
1634                    false
1635                };
1636
1637                let needs_reflow = match config.reflow_mode {
1638                    ReflowMode::Normalize => {
1639                        // Only reflow if:
1640                        // 1. Any non-exempt paragraph, when joined, exceeds the limit, OR
1641                        // 2. The list item should be normalized (has multi-line plain text)
1642                        let any_paragraph_exceeds = blocks.iter().any(|block| {
1643                            if let Block::Paragraph(para_lines) = block {
1644                                if para_lines.iter().all(|line| is_exempt_line(line)) {
1645                                    return false;
1646                                }
1647                                let joined = para_lines.join(" ");
1648                                let with_marker = format!("{}{}", " ".repeat(marker_len), joined.trim());
1649                                self.calculate_effective_length(&with_marker) > config.line_length.get()
1650                            } else {
1651                                false
1652                            }
1653                        });
1654                        if any_paragraph_exceeds {
1655                            true
1656                        } else {
1657                            should_normalize()
1658                        }
1659                    }
1660                    ReflowMode::SentencePerLine => {
1661                        // Check if list item has multiple sentences
1662                        let sentences = split_into_sentences(&combined_content);
1663                        sentences.len() > 1
1664                    }
1665                    ReflowMode::SemanticLineBreaks => {
1666                        let sentences = split_into_sentences(&combined_content);
1667                        sentences.len() > 1
1668                            || (list_start..i).any(|line_idx| {
1669                                let line = lines[line_idx];
1670                                let trimmed = line.trim();
1671                                if trimmed.is_empty() || is_exempt_line(line) {
1672                                    return false;
1673                                }
1674                                self.calculate_effective_length(line) > config.line_length.get()
1675                            })
1676                    }
1677                    ReflowMode::Default => {
1678                        // In default mode, only reflow if any individual non-exempt line exceeds limit
1679                        (list_start..i).any(|line_idx| {
1680                            let line = lines[line_idx];
1681                            let trimmed = line.trim();
1682                            // Skip blank lines and exempt lines
1683                            if trimmed.is_empty() || is_exempt_line(line) {
1684                                return false;
1685                            }
1686                            self.calculate_effective_length(line) > config.line_length.get()
1687                        })
1688                    }
1689                };
1690
1691                if needs_reflow {
1692                    let start_range = line_index.whole_line_range(list_start + 1);
1693                    let end_line = i - 1;
1694                    let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1695                        line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1696                    } else {
1697                        line_index.whole_line_range(end_line + 1)
1698                    };
1699                    let byte_range = start_range.start..end_range.end;
1700
1701                    // Reflow each block (paragraphs only, preserve code blocks)
1702                    // When line_length = 0 (no limit), use a very large value for reflow
1703                    let reflow_line_length = if config.line_length.is_unlimited() {
1704                        usize::MAX
1705                    } else {
1706                        config.line_length.get().saturating_sub(indent_size).max(1)
1707                    };
1708                    let reflow_options = crate::utils::text_reflow::ReflowOptions {
1709                        line_length: reflow_line_length,
1710                        break_on_sentences: true,
1711                        preserve_breaks: false,
1712                        sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1713                        semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1714                        abbreviations: config.abbreviations_for_reflow(),
1715                        length_mode: self.reflow_length_mode(),
1716                    };
1717
1718                    let mut result: Vec<String> = Vec::new();
1719                    let mut is_first_block = true;
1720
1721                    for (block_idx, block) in blocks.iter().enumerate() {
1722                        match block {
1723                            Block::Paragraph(para_lines) => {
1724                                // If every line in this paragraph is exempt (link ref defs,
1725                                // standalone links), preserve the paragraph verbatim instead
1726                                // of reflowing it. Reflowing would corrupt link ref defs.
1727                                let all_exempt = para_lines.iter().all(|line| is_exempt_line(line));
1728
1729                                if all_exempt {
1730                                    for (idx, line) in para_lines.iter().enumerate() {
1731                                        if is_first_block && idx == 0 {
1732                                            result.push(format!("{marker}{line}"));
1733                                            is_first_block = false;
1734                                        } else {
1735                                            result.push(format!("{expected_indent}{line}"));
1736                                        }
1737                                    }
1738                                } else {
1739                                    // Split the paragraph into segments at hard break boundaries
1740                                    // Each segment can be reflowed independently
1741                                    let segments = split_into_segments(para_lines);
1742
1743                                    for (segment_idx, segment) in segments.iter().enumerate() {
1744                                        // Check if this segment ends with a hard break and what type
1745                                        let hard_break_type = segment.last().and_then(|line| {
1746                                            let line = line.strip_suffix('\r').unwrap_or(line);
1747                                            if line.ends_with('\\') {
1748                                                Some("\\")
1749                                            } else if line.ends_with("  ") {
1750                                                Some("  ")
1751                                            } else {
1752                                                None
1753                                            }
1754                                        });
1755
1756                                        // Join and reflow the segment (removing the hard break marker for processing)
1757                                        let segment_for_reflow: Vec<String> = segment
1758                                            .iter()
1759                                            .map(|line| {
1760                                                // Strip hard break marker (2 spaces or backslash) for reflow processing
1761                                                if line.ends_with('\\') {
1762                                                    line[..line.len() - 1].trim_end().to_string()
1763                                                } else if line.ends_with("  ") {
1764                                                    line[..line.len() - 2].trim_end().to_string()
1765                                                } else {
1766                                                    line.clone()
1767                                                }
1768                                            })
1769                                            .collect();
1770
1771                                        let segment_text = segment_for_reflow.join(" ").trim().to_string();
1772                                        if !segment_text.is_empty() {
1773                                            let reflowed =
1774                                                crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1775
1776                                            if is_first_block && segment_idx == 0 {
1777                                                // First segment of first block starts with marker
1778                                                result.push(format!("{marker}{}", reflowed[0]));
1779                                                for line in reflowed.iter().skip(1) {
1780                                                    result.push(format!("{expected_indent}{line}"));
1781                                                }
1782                                                is_first_block = false;
1783                                            } else {
1784                                                // Subsequent segments
1785                                                for line in reflowed {
1786                                                    result.push(format!("{expected_indent}{line}"));
1787                                                }
1788                                            }
1789
1790                                            // If this segment had a hard break, add it back to the last line
1791                                            // Preserve the original hard break format (backslash or two spaces)
1792                                            if let Some(break_marker) = hard_break_type
1793                                                && let Some(last_line) = result.last_mut()
1794                                            {
1795                                                last_line.push_str(break_marker);
1796                                            }
1797                                        }
1798                                    }
1799                                }
1800
1801                                // Add blank line after paragraph block if there's a next block.
1802                                // Check if next block is a code block that doesn't want a preceding blank.
1803                                // Also don't add blank lines before snippet lines (they should stay tight).
1804                                // Only add if not already ending with one (avoids double blanks).
1805                                if block_idx < blocks.len() - 1 {
1806                                    let next_block = &blocks[block_idx + 1];
1807                                    let should_add_blank = match next_block {
1808                                        Block::Code {
1809                                            has_preceding_blank, ..
1810                                        } => *has_preceding_blank,
1811                                        Block::SnippetLine(_) | Block::DivMarker(_) => false,
1812                                        _ => true, // For all other blocks, add blank line
1813                                    };
1814                                    if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1815                                    {
1816                                        result.push(String::new());
1817                                    }
1818                                }
1819                            }
1820                            Block::Code {
1821                                lines: code_lines,
1822                                has_preceding_blank: _,
1823                            } => {
1824                                // Preserve code blocks as-is with original indentation
1825                                // NOTE: Blank line before code block is handled by the previous block
1826                                // (see paragraph block's logic above)
1827
1828                                for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1829                                    if is_first_block && idx == 0 {
1830                                        // First line of first block gets marker
1831                                        result.push(format!(
1832                                            "{marker}{}",
1833                                            " ".repeat(orig_indent - marker_len) + content
1834                                        ));
1835                                        is_first_block = false;
1836                                    } else if content.is_empty() {
1837                                        result.push(String::new());
1838                                    } else {
1839                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1840                                    }
1841                                }
1842                            }
1843                            Block::NestedList(nested_items) => {
1844                                // Preserve nested list items as-is with original indentation.
1845                                // Only add blank before if not already ending with one (avoids
1846                                // double blanks when the preceding block already added one).
1847                                if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1848                                    result.push(String::new());
1849                                }
1850
1851                                for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1852                                    if is_first_block && idx == 0 {
1853                                        // First line of first block gets marker
1854                                        result.push(format!(
1855                                            "{marker}{}",
1856                                            " ".repeat(orig_indent - marker_len) + content
1857                                        ));
1858                                        is_first_block = false;
1859                                    } else if content.is_empty() {
1860                                        result.push(String::new());
1861                                    } else {
1862                                        result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1863                                    }
1864                                }
1865
1866                                // Add blank line after nested list if there's a next block.
1867                                // Only add if not already ending with one (avoids double blanks
1868                                // when the last nested item was already a blank line).
1869                                if block_idx < blocks.len() - 1 {
1870                                    let next_block = &blocks[block_idx + 1];
1871                                    let should_add_blank = match next_block {
1872                                        Block::Code {
1873                                            has_preceding_blank, ..
1874                                        } => *has_preceding_blank,
1875                                        Block::SnippetLine(_) | Block::DivMarker(_) => false,
1876                                        _ => true, // For all other blocks, add blank line
1877                                    };
1878                                    if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1879                                    {
1880                                        result.push(String::new());
1881                                    }
1882                                }
1883                            }
1884                            Block::SemanticLine(content) => {
1885                                // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
1886                                // Only add blank before if not already ending with one.
1887                                if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1888                                    result.push(String::new());
1889                                }
1890
1891                                if is_first_block {
1892                                    // First block starts with marker
1893                                    result.push(format!("{marker}{content}"));
1894                                    is_first_block = false;
1895                                } else {
1896                                    // Subsequent blocks use expected indent
1897                                    result.push(format!("{expected_indent}{content}"));
1898                                }
1899
1900                                // Add blank line after semantic line if there's a next block.
1901                                // Only add if not already ending with one.
1902                                if block_idx < blocks.len() - 1 {
1903                                    let next_block = &blocks[block_idx + 1];
1904                                    let should_add_blank = match next_block {
1905                                        Block::Code {
1906                                            has_preceding_blank, ..
1907                                        } => *has_preceding_blank,
1908                                        Block::SnippetLine(_) | Block::DivMarker(_) => false,
1909                                        _ => true, // For all other blocks, add blank line
1910                                    };
1911                                    if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1912                                    {
1913                                        result.push(String::new());
1914                                    }
1915                                }
1916                            }
1917                            Block::SnippetLine(content) => {
1918                                // Preserve snippet delimiters (-8<-) as-is on their own line
1919                                // Unlike semantic lines, snippet lines don't add extra blank lines
1920                                if is_first_block {
1921                                    // First block starts with marker
1922                                    result.push(format!("{marker}{content}"));
1923                                    is_first_block = false;
1924                                } else {
1925                                    // Subsequent blocks use expected indent
1926                                    result.push(format!("{expected_indent}{content}"));
1927                                }
1928                                // No blank lines added before or after snippet delimiters
1929                            }
1930                            Block::DivMarker(content) => {
1931                                // Preserve div markers (::: opening or closing) as-is on their own line
1932                                if is_first_block {
1933                                    result.push(format!("{marker}{content}"));
1934                                    is_first_block = false;
1935                                } else {
1936                                    result.push(format!("{expected_indent}{content}"));
1937                                }
1938                            }
1939                            Block::Html {
1940                                lines: html_lines,
1941                                has_preceding_blank: _,
1942                            } => {
1943                                // Preserve HTML blocks exactly as-is with original indentation
1944                                // NOTE: Blank line before HTML block is handled by the previous block
1945
1946                                for (idx, line) in html_lines.iter().enumerate() {
1947                                    if is_first_block && idx == 0 {
1948                                        // First line of first block gets marker
1949                                        result.push(format!("{marker}{line}"));
1950                                        is_first_block = false;
1951                                    } else if line.is_empty() {
1952                                        // Preserve blank lines inside HTML blocks
1953                                        result.push(String::new());
1954                                    } else {
1955                                        // Preserve lines with their original content (already includes indentation)
1956                                        result.push(format!("{expected_indent}{line}"));
1957                                    }
1958                                }
1959
1960                                // Add blank line after HTML block if there's a next block.
1961                                // Only add if not already ending with one (avoids double blanks
1962                                // when the HTML block itself contained a trailing blank line).
1963                                if block_idx < blocks.len() - 1 {
1964                                    let next_block = &blocks[block_idx + 1];
1965                                    let should_add_blank = match next_block {
1966                                        Block::Code {
1967                                            has_preceding_blank, ..
1968                                        } => *has_preceding_blank,
1969                                        Block::Html {
1970                                            has_preceding_blank, ..
1971                                        } => *has_preceding_blank,
1972                                        Block::SnippetLine(_) | Block::DivMarker(_) => false,
1973                                        _ => true, // For all other blocks, add blank line
1974                                    };
1975                                    if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1976                                    {
1977                                        result.push(String::new());
1978                                    }
1979                                }
1980                            }
1981                        }
1982                    }
1983
1984                    let reflowed_text = result.join(line_ending);
1985
1986                    // Preserve trailing newline
1987                    let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1988                        format!("{reflowed_text}{line_ending}")
1989                    } else {
1990                        reflowed_text
1991                    };
1992
1993                    // Get the original text to compare
1994                    let original_text = &ctx.content[byte_range.clone()];
1995
1996                    // Only generate a warning if the replacement is different from the original
1997                    if original_text != replacement {
1998                        // Generate an appropriate message based on why reflow is needed
1999                        let message = match config.reflow_mode {
2000                            ReflowMode::SentencePerLine => {
2001                                let num_sentences = split_into_sentences(&combined_content).len();
2002                                let num_lines = content_lines.len();
2003                                if num_lines == 1 {
2004                                    // Single line with multiple sentences
2005                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
2006                                } else {
2007                                    // Multiple lines - could be split sentences or mixed
2008                                    format!(
2009                                        "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
2010                                    )
2011                                }
2012                            }
2013                            ReflowMode::SemanticLineBreaks => {
2014                                let num_sentences = split_into_sentences(&combined_content).len();
2015                                format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
2016                            }
2017                            ReflowMode::Normalize => {
2018                                // Find the longest non-exempt paragraph when joined
2019                                let max_para_length = blocks
2020                                    .iter()
2021                                    .filter_map(|block| {
2022                                        if let Block::Paragraph(para_lines) = block {
2023                                            if para_lines.iter().all(|line| is_exempt_line(line)) {
2024                                                return None;
2025                                            }
2026                                            let joined = para_lines.join(" ");
2027                                            let with_indent = format!("{}{}", " ".repeat(marker_len), joined.trim());
2028                                            Some(self.calculate_effective_length(&with_indent))
2029                                        } else {
2030                                            None
2031                                        }
2032                                    })
2033                                    .max()
2034                                    .unwrap_or(0);
2035                                if max_para_length > config.line_length.get() {
2036                                    format!(
2037                                        "Line length {} exceeds {} characters",
2038                                        max_para_length,
2039                                        config.line_length.get()
2040                                    )
2041                                } else {
2042                                    "Multi-line content can be normalized".to_string()
2043                                }
2044                            }
2045                            ReflowMode::Default => {
2046                                // Report the actual longest non-exempt line, not the combined content
2047                                let max_length = (list_start..i)
2048                                    .filter(|&line_idx| {
2049                                        let line = lines[line_idx];
2050                                        let trimmed = line.trim();
2051                                        !trimmed.is_empty() && !is_exempt_line(line)
2052                                    })
2053                                    .map(|line_idx| self.calculate_effective_length(lines[line_idx]))
2054                                    .max()
2055                                    .unwrap_or(0);
2056                                format!(
2057                                    "Line length {} exceeds {} characters",
2058                                    max_length,
2059                                    config.line_length.get()
2060                                )
2061                            }
2062                        };
2063
2064                        warnings.push(LintWarning {
2065                            rule_name: Some(self.name().to_string()),
2066                            message,
2067                            line: list_start + 1,
2068                            column: 1,
2069                            end_line: end_line + 1,
2070                            end_column: lines[end_line].len() + 1,
2071                            severity: Severity::Warning,
2072                            fix: Some(crate::rule::Fix {
2073                                range: byte_range,
2074                                replacement,
2075                            }),
2076                        });
2077                    }
2078                }
2079                continue;
2080            }
2081
2082            // Found start of a paragraph - collect all lines in it
2083            let paragraph_start = i;
2084            let mut paragraph_lines = vec![lines[i]];
2085            i += 1;
2086
2087            while i < lines.len() {
2088                let next_line = lines[i];
2089                let next_line_num = i + 1;
2090                let next_trimmed = next_line.trim();
2091
2092                // Stop at paragraph boundaries
2093                if next_trimmed.is_empty()
2094                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
2095                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
2096                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
2097                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
2098                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
2099                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
2100                    || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
2101                    || ctx
2102                        .line_info(next_line_num)
2103                        .is_some_and(|info| info.in_mkdocs_container())
2104                    || (next_line_num > 0
2105                        && next_line_num <= ctx.lines.len()
2106                        && ctx.lines[next_line_num - 1].blockquote.is_some())
2107                    || next_trimmed.starts_with('#')
2108                    || TableUtils::is_potential_table_row(next_line)
2109                    || is_list_item(next_trimmed)
2110                    || is_horizontal_rule(next_trimmed)
2111                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
2112                    || is_template_directive_only(next_line)
2113                    || is_standalone_attr_list(next_line)
2114                    || is_snippet_block_delimiter(next_line)
2115                    || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
2116                {
2117                    break;
2118                }
2119
2120                // Check if the previous line ends with a hard break (2+ spaces or backslash)
2121                if i > 0 && has_hard_break(lines[i - 1]) {
2122                    // Don't include lines after hard breaks in the same paragraph
2123                    break;
2124                }
2125
2126                paragraph_lines.push(next_line);
2127                i += 1;
2128            }
2129
2130            // Combine paragraph lines into a single string for processing
2131            // This must be done BEFORE the needs_reflow check for sentence-per-line mode
2132            let paragraph_text = paragraph_lines.join(" ");
2133
2134            // Skip reflowing if this paragraph contains definition list items
2135            // Definition lists are multi-line structures that should not be joined
2136            let contains_definition_list = paragraph_lines
2137                .iter()
2138                .any(|line| crate::utils::is_definition_list_item(line));
2139
2140            if contains_definition_list {
2141                // Don't reflow definition lists - skip this paragraph
2142                i = paragraph_start + paragraph_lines.len();
2143                continue;
2144            }
2145
2146            // Skip reflowing if this paragraph contains MkDocs Snippets markers
2147            // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2148            let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2149
2150            if contains_snippets {
2151                // Don't reflow Snippets blocks - skip this paragraph
2152                i = paragraph_start + paragraph_lines.len();
2153                continue;
2154            }
2155
2156            // Check if this paragraph needs reflowing
2157            let needs_reflow = match config.reflow_mode {
2158                ReflowMode::Normalize => {
2159                    // In normalize mode, reflow multi-line paragraphs
2160                    paragraph_lines.len() > 1
2161                }
2162                ReflowMode::SentencePerLine => {
2163                    // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2164                    // Note: we check the joined text because sentences can span multiple lines
2165                    let sentences = split_into_sentences(&paragraph_text);
2166
2167                    // Always reflow if multiple sentences on one line
2168                    if sentences.len() > 1 {
2169                        true
2170                    } else if paragraph_lines.len() > 1 {
2171                        // For single-sentence paragraphs spanning multiple lines:
2172                        // Reflow if they COULD fit on one line (respecting line-length constraint)
2173                        if config.line_length.is_unlimited() {
2174                            // No line-length constraint - always join single sentences
2175                            true
2176                        } else {
2177                            // Only join if it fits within line-length
2178                            let effective_length = self.calculate_effective_length(&paragraph_text);
2179                            effective_length <= config.line_length.get()
2180                        }
2181                    } else {
2182                        false
2183                    }
2184                }
2185                ReflowMode::SemanticLineBreaks => {
2186                    let sentences = split_into_sentences(&paragraph_text);
2187                    // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2188                    sentences.len() > 1
2189                        || paragraph_lines.len() > 1
2190                        || paragraph_lines
2191                            .iter()
2192                            .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2193                }
2194                ReflowMode::Default => {
2195                    // In default mode, only reflow if lines exceed limit
2196                    paragraph_lines
2197                        .iter()
2198                        .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2199                }
2200            };
2201
2202            if needs_reflow {
2203                // Calculate byte range for this paragraph
2204                // Use whole_line_range for each line and combine
2205                let start_range = line_index.whole_line_range(paragraph_start + 1);
2206                let end_line = paragraph_start + paragraph_lines.len() - 1;
2207
2208                // For the last line, we want to preserve any trailing newline
2209                let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2210                    // Last line without trailing newline - use line_text_range
2211                    line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2212                } else {
2213                    // Not the last line or has trailing newline - use whole_line_range
2214                    line_index.whole_line_range(end_line + 1)
2215                };
2216
2217                let byte_range = start_range.start..end_range.end;
2218
2219                // Check if the paragraph ends with a hard break and what type
2220                let hard_break_type = paragraph_lines.last().and_then(|line| {
2221                    let line = line.strip_suffix('\r').unwrap_or(line);
2222                    if line.ends_with('\\') {
2223                        Some("\\")
2224                    } else if line.ends_with("  ") {
2225                        Some("  ")
2226                    } else {
2227                        None
2228                    }
2229                });
2230
2231                // Reflow the paragraph
2232                // When line_length = 0 (no limit), use a very large value for reflow
2233                let reflow_line_length = if config.line_length.is_unlimited() {
2234                    usize::MAX
2235                } else {
2236                    config.line_length.get()
2237                };
2238                let reflow_options = crate::utils::text_reflow::ReflowOptions {
2239                    line_length: reflow_line_length,
2240                    break_on_sentences: true,
2241                    preserve_breaks: false,
2242                    sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2243                    semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2244                    abbreviations: config.abbreviations_for_reflow(),
2245                    length_mode: self.reflow_length_mode(),
2246                };
2247                let mut reflowed = crate::utils::text_reflow::reflow_line(&paragraph_text, &reflow_options);
2248
2249                // If the original paragraph ended with a hard break, preserve it
2250                // Preserve the original hard break format (backslash or two spaces)
2251                if let Some(break_marker) = hard_break_type
2252                    && !reflowed.is_empty()
2253                {
2254                    let last_idx = reflowed.len() - 1;
2255                    if !has_hard_break(&reflowed[last_idx]) {
2256                        reflowed[last_idx].push_str(break_marker);
2257                    }
2258                }
2259
2260                let reflowed_text = reflowed.join(line_ending);
2261
2262                // Preserve trailing newline if the original paragraph had one
2263                let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2264                    format!("{reflowed_text}{line_ending}")
2265                } else {
2266                    reflowed_text
2267                };
2268
2269                // Get the original text to compare
2270                let original_text = &ctx.content[byte_range.clone()];
2271
2272                // Only generate a warning if the replacement is different from the original
2273                if original_text != replacement {
2274                    // Create warning with actual fix
2275                    // In default mode, report the specific line that violates
2276                    // In normalize mode, report the whole paragraph
2277                    // In sentence-per-line mode, report the entire paragraph
2278                    let (warning_line, warning_end_line) = match config.reflow_mode {
2279                        ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2280                        ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2281                            // Highlight the entire paragraph that needs reformatting
2282                            (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2283                        }
2284                        ReflowMode::Default => {
2285                            // Find the first line that exceeds the limit
2286                            let mut violating_line = paragraph_start;
2287                            for (idx, line) in paragraph_lines.iter().enumerate() {
2288                                if self.calculate_effective_length(line) > config.line_length.get() {
2289                                    violating_line = paragraph_start + idx;
2290                                    break;
2291                                }
2292                            }
2293                            (violating_line + 1, violating_line + 1)
2294                        }
2295                    };
2296
2297                    warnings.push(LintWarning {
2298                        rule_name: Some(self.name().to_string()),
2299                        message: match config.reflow_mode {
2300                            ReflowMode::Normalize => format!(
2301                                "Paragraph could be normalized to use line length of {} characters",
2302                                config.line_length.get()
2303                            ),
2304                            ReflowMode::SentencePerLine => {
2305                                let num_sentences = split_into_sentences(&paragraph_text).len();
2306                                if paragraph_lines.len() == 1 {
2307                                    // Single line with multiple sentences
2308                                    format!("Line contains {num_sentences} sentences (one sentence per line required)")
2309                                } else {
2310                                    let num_lines = paragraph_lines.len();
2311                                    // Multiple lines - could be split sentences or mixed
2312                                    format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2313                                }
2314                            },
2315                            ReflowMode::SemanticLineBreaks => {
2316                                let num_sentences = split_into_sentences(&paragraph_text).len();
2317                                format!(
2318                                    "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2319                                )
2320                            },
2321                            ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2322                        },
2323                        line: warning_line,
2324                        column: 1,
2325                        end_line: warning_end_line,
2326                        end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2327                        severity: Severity::Warning,
2328                        fix: Some(crate::rule::Fix {
2329                            range: byte_range,
2330                            replacement,
2331                        }),
2332                    });
2333                }
2334            }
2335        }
2336
2337        warnings
2338    }
2339
2340    /// Calculate string length based on the configured length mode
2341    fn calculate_string_length(&self, s: &str) -> usize {
2342        match self.config.length_mode {
2343            LengthMode::Chars => s.chars().count(),
2344            LengthMode::Visual => s.width(),
2345            LengthMode::Bytes => s.len(),
2346        }
2347    }
2348
2349    /// Calculate effective line length
2350    ///
2351    /// Returns the actual display length of the line using the configured length mode.
2352    fn calculate_effective_length(&self, line: &str) -> usize {
2353        self.calculate_string_length(line)
2354    }
2355
2356    /// Calculate line length with inline link/image URLs removed.
2357    ///
2358    /// For each inline link `[text](url)` or image `![alt](url)` on the line,
2359    /// computes the "savings" from removing the URL portion (keeping only `[text]`
2360    /// or `![alt]`). Returns `effective_length - total_savings`.
2361    ///
2362    /// Handles nested constructs (e.g., `[![img](url)](url)`) by only counting the
2363    /// outermost construct to avoid double-counting.
2364    fn calculate_text_only_length(
2365        &self,
2366        effective_length: usize,
2367        line_number: usize,
2368        ctx: &crate::lint_context::LintContext,
2369    ) -> usize {
2370        let line_range = ctx.line_index.line_content_range(line_number);
2371        let line_byte_end = line_range.end;
2372
2373        // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2374        let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2375
2376        for link in &ctx.links {
2377            if link.line != line_number || link.is_reference {
2378                continue;
2379            }
2380            if !matches!(link.link_type, LinkType::Inline) {
2381                continue;
2382            }
2383            // Skip cross-line links
2384            if link.byte_end > line_byte_end {
2385                continue;
2386            }
2387            // `[text]` in configured length mode
2388            let text_only_len = 2 + self.calculate_string_length(&link.text);
2389            constructs.push((link.byte_offset, link.byte_end, text_only_len));
2390        }
2391
2392        for image in &ctx.images {
2393            if image.line != line_number || image.is_reference {
2394                continue;
2395            }
2396            if !matches!(image.link_type, LinkType::Inline) {
2397                continue;
2398            }
2399            // Skip cross-line images
2400            if image.byte_end > line_byte_end {
2401                continue;
2402            }
2403            // `![alt]` in configured length mode
2404            let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2405            constructs.push((image.byte_offset, image.byte_end, text_only_len));
2406        }
2407
2408        if constructs.is_empty() {
2409            return effective_length;
2410        }
2411
2412        // Sort by byte offset to handle overlapping/nested constructs
2413        constructs.sort_by_key(|&(start, _, _)| start);
2414
2415        let mut total_savings: usize = 0;
2416        let mut last_end: usize = 0;
2417
2418        for (start, end, text_only_len) in &constructs {
2419            // Skip constructs nested inside a previously counted one
2420            if *start < last_end {
2421                continue;
2422            }
2423            // Full construct length in configured length mode
2424            let full_source = &ctx.content[*start..*end];
2425            let full_len = self.calculate_string_length(full_source);
2426            total_savings += full_len.saturating_sub(*text_only_len);
2427            last_end = *end;
2428        }
2429
2430        effective_length.saturating_sub(total_savings)
2431    }
2432}
rumdl_lib/rules/md013_line_length/mod.rs

rumdl_lib/rules/md013_line_length/
mod.rs