rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
7use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
8use crate::utils::range_utils::LineIndex;
9use crate::utils::range_utils::calculate_excess_range;
10use crate::utils::regex_cache::{
11 IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
12};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::split_into_sentences;
15use toml;
16
17mod helpers;
18pub mod md013_config;
19use helpers::{
20 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
21 split_into_segments, trim_preserving_hard_break,
22};
23pub use md013_config::MD013Config;
24use md013_config::{LengthMode, ReflowMode};
25
26#[cfg(test)]
27mod tests;
28use unicode_width::UnicodeWidthStr;
29
30#[derive(Clone, Default)]
31pub struct MD013LineLength {
32 pub(crate) config: MD013Config,
33}
34
35impl MD013LineLength {
36 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
37 Self {
38 config: MD013Config {
39 line_length: crate::types::LineLength::new(line_length),
40 code_blocks,
41 tables,
42 headings,
43 paragraphs: true, // Default to true for backwards compatibility
44 strict,
45 reflow: false,
46 reflow_mode: ReflowMode::default(),
47 length_mode: LengthMode::default(),
48 abbreviations: Vec::new(),
49 },
50 }
51 }
52
53 pub fn from_config_struct(config: MD013Config) -> Self {
54 Self { config }
55 }
56
57 fn should_ignore_line(
58 &self,
59 line: &str,
60 _lines: &[&str],
61 current_line: usize,
62 ctx: &crate::lint_context::LintContext,
63 ) -> bool {
64 if self.config.strict {
65 return false;
66 }
67
68 // Quick check for common patterns before expensive regex
69 let trimmed = line.trim();
70
71 // Only skip if the entire line is a URL (quick check first)
72 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
73 return true;
74 }
75
76 // Only skip if the entire line is an image reference (quick check first)
77 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
78 return true;
79 }
80
81 // Only skip if the entire line is a link reference (quick check first)
82 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
83 return true;
84 }
85
86 // Code blocks with long strings (only check if in code block)
87 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
88 && !trimmed.is_empty()
89 && !line.contains(' ')
90 && !line.contains('\t')
91 {
92 return true;
93 }
94
95 false
96 }
97
98 /// Check if rule should skip based on provided config (used for inline config support)
99 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
100 // Skip if content is empty
101 if ctx.content.is_empty() {
102 return true;
103 }
104
105 // For sentence-per-line or normalize mode, never skip based on line length
106 if config.reflow
107 && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
108 {
109 return false;
110 }
111
112 // Quick check: if total content is shorter than line limit, definitely skip
113 if ctx.content.len() <= config.line_length.get() {
114 return true;
115 }
116
117 // Skip if no line exceeds the limit
118 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
119 }
120}
121
122impl Rule for MD013LineLength {
123 fn name(&self) -> &'static str {
124 "MD013"
125 }
126
127 fn description(&self) -> &'static str {
128 "Line length should not be excessive"
129 }
130
131 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
132 let content = ctx.content;
133
134 // Parse inline configuration FIRST so we can use effective config for should_skip
135 let inline_config = crate::inline_config::InlineConfig::from_content(content);
136 let config_override = inline_config.get_rule_config("MD013");
137
138 // Apply configuration override if present
139 let effective_config = if let Some(json_config) = config_override {
140 if let Some(obj) = json_config.as_object() {
141 let mut config = self.config.clone();
142 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
143 config.line_length = crate::types::LineLength::new(line_length as usize);
144 }
145 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
146 config.code_blocks = code_blocks;
147 }
148 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
149 config.tables = tables;
150 }
151 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
152 config.headings = headings;
153 }
154 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
155 config.strict = strict;
156 }
157 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
158 config.reflow = reflow;
159 }
160 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
161 config.reflow_mode = match reflow_mode {
162 "default" => ReflowMode::Default,
163 "normalize" => ReflowMode::Normalize,
164 "sentence-per-line" => ReflowMode::SentencePerLine,
165 _ => ReflowMode::default(),
166 };
167 }
168 config
169 } else {
170 self.config.clone()
171 }
172 } else {
173 self.config.clone()
174 };
175
176 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
177 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
178 if self.should_skip_with_config(ctx, &effective_config)
179 && !(effective_config.reflow
180 && (effective_config.reflow_mode == ReflowMode::Normalize
181 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
182 {
183 return Ok(Vec::new());
184 }
185
186 // Direct implementation without DocumentStructure
187 let mut warnings = Vec::new();
188
189 // Special handling: line_length = 0 means "no line length limit"
190 // Skip all line length checks, but still allow reflow if enabled
191 let skip_length_checks = effective_config.line_length.is_unlimited();
192
193 // Pre-filter lines that could be problematic to avoid processing all lines
194 let mut candidate_lines = Vec::new();
195 if !skip_length_checks {
196 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
197 // Skip front matter - it should never be linted
198 if line_info.in_front_matter {
199 continue;
200 }
201
202 // Quick length check first
203 if line_info.byte_len > effective_config.line_length.get() {
204 candidate_lines.push(line_idx);
205 }
206 }
207 }
208
209 // If no candidate lines and not in normalize or sentence-per-line mode, early return
210 if candidate_lines.is_empty()
211 && !(effective_config.reflow
212 && (effective_config.reflow_mode == ReflowMode::Normalize
213 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
214 {
215 return Ok(warnings);
216 }
217
218 // Use ctx.lines if available for better performance
219 let lines: Vec<&str> = if !ctx.lines.is_empty() {
220 ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
221 } else {
222 content.lines().collect()
223 };
224
225 // Create a quick lookup set for heading lines
226 // We need this for both the heading skip check AND the paragraphs check
227 let heading_lines_set: std::collections::HashSet<usize> = ctx
228 .lines
229 .iter()
230 .enumerate()
231 .filter(|(_, line)| line.heading.is_some())
232 .map(|(idx, _)| idx + 1)
233 .collect();
234
235 // Use pre-computed table blocks from context
236 // We need this for both the table skip check AND the paragraphs check
237 let table_blocks = &ctx.table_blocks;
238 let mut table_lines_set = std::collections::HashSet::new();
239 for table in table_blocks {
240 table_lines_set.insert(table.header_line + 1);
241 table_lines_set.insert(table.delimiter_line + 1);
242 for &line in &table.content_lines {
243 table_lines_set.insert(line + 1);
244 }
245 }
246
247 // Process candidate lines for line length checks
248 for &line_idx in &candidate_lines {
249 let line_number = line_idx + 1;
250 let line = lines[line_idx];
251
252 // Calculate effective length excluding unbreakable URLs
253 let effective_length = self.calculate_effective_length(line);
254
255 // Use single line length limit for all content
256 let line_limit = effective_config.line_length.get();
257
258 // Skip short lines immediately (double-check after effective length calculation)
259 if effective_length <= line_limit {
260 continue;
261 }
262
263 // Skip mkdocstrings blocks (already handled by LintContext)
264 if ctx.lines[line_idx].in_mkdocstrings {
265 continue;
266 }
267
268 // Skip various block types efficiently
269 if !effective_config.strict {
270 // Skip setext heading underlines
271 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
272 continue;
273 }
274
275 // Skip block elements according to config flags
276 // The flags mean: true = check these elements, false = skip these elements
277 // So we skip when the flag is FALSE and the line is in that element type
278 if (!effective_config.headings && heading_lines_set.contains(&line_number))
279 || (!effective_config.code_blocks
280 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
281 || (!effective_config.tables && table_lines_set.contains(&line_number))
282 || ctx.lines[line_number - 1].blockquote.is_some()
283 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
284 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
285 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
286 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
287 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
288 {
289 continue;
290 }
291
292 // Check if this is a paragraph/regular text line
293 // If paragraphs = false, skip lines that are NOT in special blocks
294 if !effective_config.paragraphs {
295 let is_special_block = heading_lines_set.contains(&line_number)
296 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
297 || table_lines_set.contains(&line_number)
298 || ctx.lines[line_number - 1].blockquote.is_some()
299 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
300 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
301 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
302 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
303 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment);
304
305 // Skip regular paragraph text when paragraphs = false
306 if !is_special_block {
307 continue;
308 }
309 }
310
311 // Skip lines that are only a URL, image ref, or link ref
312 if self.should_ignore_line(line, &lines, line_idx, ctx) {
313 continue;
314 }
315 }
316
317 // In sentence-per-line mode, check if this is a single long sentence
318 // If so, emit a warning without a fix (user must manually rephrase)
319 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
320 let sentences = split_into_sentences(line.trim());
321 if sentences.len() == 1 {
322 // Single sentence that's too long - warn but don't auto-fix
323 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
324
325 let (start_line, start_col, end_line, end_col) =
326 calculate_excess_range(line_number, line, line_limit);
327
328 warnings.push(LintWarning {
329 rule_name: Some(self.name().to_string()),
330 message,
331 line: start_line,
332 column: start_col,
333 end_line,
334 end_column: end_col,
335 severity: Severity::Warning,
336 fix: None, // No auto-fix for long single sentences
337 });
338 continue;
339 }
340 // Multiple sentences will be handled by paragraph-based reflow
341 continue;
342 }
343
344 // Don't provide fix for individual lines when reflow is enabled
345 // Paragraph-based fixes will be handled separately
346 let fix = None;
347
348 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
349
350 // Calculate precise character range for the excess portion
351 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
352
353 warnings.push(LintWarning {
354 rule_name: Some(self.name().to_string()),
355 message,
356 line: start_line,
357 column: start_col,
358 end_line,
359 end_column: end_col,
360 severity: Severity::Warning,
361 fix,
362 });
363 }
364
365 // If reflow is enabled, generate paragraph-based fixes
366 if effective_config.reflow {
367 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
368 // Merge paragraph warnings with line warnings, removing duplicates
369 for pw in paragraph_warnings {
370 // Remove any line warnings that overlap with this paragraph
371 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
372 warnings.push(pw);
373 }
374 }
375
376 Ok(warnings)
377 }
378
379 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
380 // For CLI usage, apply fixes from warnings
381 // LSP will use the warning-based fixes directly
382 let warnings = self.check(ctx)?;
383
384 // If there are no fixes, return content unchanged
385 if !warnings.iter().any(|w| w.fix.is_some()) {
386 return Ok(ctx.content.to_string());
387 }
388
389 // Apply warning-based fixes
390 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
391 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
392 }
393
394 fn as_any(&self) -> &dyn std::any::Any {
395 self
396 }
397
398 fn category(&self) -> RuleCategory {
399 RuleCategory::Whitespace
400 }
401
402 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
403 self.should_skip_with_config(ctx, &self.config)
404 }
405
406 fn default_config_section(&self) -> Option<(String, toml::Value)> {
407 let default_config = MD013Config::default();
408 let json_value = serde_json::to_value(&default_config).ok()?;
409 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
410
411 if let toml::Value::Table(table) = toml_value {
412 if !table.is_empty() {
413 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
414 } else {
415 None
416 }
417 } else {
418 None
419 }
420 }
421
422 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
423 let mut aliases = std::collections::HashMap::new();
424 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
425 Some(aliases)
426 }
427
428 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
429 where
430 Self: Sized,
431 {
432 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
433 // Use global line_length if rule-specific config still has default value
434 if rule_config.line_length.get() == 80 {
435 rule_config.line_length = config.global.line_length;
436 }
437 Box::new(Self::from_config_struct(rule_config))
438 }
439}
440
441impl MD013LineLength {
442 /// Generate paragraph-based fixes
443 fn generate_paragraph_fixes(
444 &self,
445 ctx: &crate::lint_context::LintContext,
446 config: &MD013Config,
447 lines: &[&str],
448 ) -> Vec<LintWarning> {
449 let mut warnings = Vec::new();
450 let line_index = LineIndex::new(ctx.content);
451
452 let mut i = 0;
453 while i < lines.len() {
454 let line_num = i + 1;
455
456 // Skip special structures
457 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
458 info.in_code_block
459 || info.in_front_matter
460 || info.in_html_block
461 || info.in_html_comment
462 || info.in_esm_block
463 || info.in_jsx_expression
464 || info.in_mdx_comment
465 });
466
467 if should_skip_due_to_line_info
468 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
469 || lines[i].trim().starts_with('#')
470 || TableUtils::is_potential_table_row(lines[i])
471 || lines[i].trim().is_empty()
472 || is_horizontal_rule(lines[i].trim())
473 || is_template_directive_only(lines[i])
474 {
475 i += 1;
476 continue;
477 }
478
479 // Helper function to detect semantic line markers
480 let is_semantic_line = |content: &str| -> bool {
481 let trimmed = content.trim_start();
482 let semantic_markers = [
483 "NOTE:",
484 "WARNING:",
485 "IMPORTANT:",
486 "CAUTION:",
487 "TIP:",
488 "DANGER:",
489 "HINT:",
490 "INFO:",
491 ];
492 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
493 };
494
495 // Helper function to detect fence markers (opening or closing)
496 let is_fence_marker = |content: &str| -> bool {
497 let trimmed = content.trim_start();
498 trimmed.starts_with("```") || trimmed.starts_with("~~~")
499 };
500
501 // Check if this is a list item - handle it specially
502 let trimmed = lines[i].trim();
503 if is_list_item(trimmed) {
504 // Collect the entire list item including continuation lines
505 let list_start = i;
506 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
507 let marker_len = marker.len();
508
509 // Track lines and their types (content, code block, fence, nested list)
510 #[derive(Clone)]
511 enum LineType {
512 Content(String),
513 CodeBlock(String, usize), // content and original indent
514 NestedListItem(String, usize), // full line content and original indent
515 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
516 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
517 Empty,
518 }
519
520 let mut actual_indent: Option<usize> = None;
521 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
522 i += 1;
523
524 // Collect continuation lines using ctx.lines for metadata
525 while i < lines.len() {
526 let line_info = &ctx.lines[i];
527
528 // Use pre-computed is_blank from ctx
529 if line_info.is_blank {
530 // Empty line - check if next line is indented (part of list item)
531 if i + 1 < lines.len() {
532 let next_info = &ctx.lines[i + 1];
533
534 // Check if next line is indented enough to be continuation
535 if !next_info.is_blank && next_info.indent >= marker_len {
536 // This blank line is between paragraphs/blocks in the list item
537 list_item_lines.push(LineType::Empty);
538 i += 1;
539 continue;
540 }
541 }
542 // No indented line after blank, end of list item
543 break;
544 }
545
546 // Use pre-computed indent from ctx
547 let indent = line_info.indent;
548
549 // Valid continuation must be indented at least marker_len
550 if indent >= marker_len {
551 let trimmed = line_info.content(ctx.content).trim();
552
553 // Use pre-computed in_code_block from ctx
554 if line_info.in_code_block {
555 list_item_lines.push(LineType::CodeBlock(
556 line_info.content(ctx.content)[indent..].to_string(),
557 indent,
558 ));
559 i += 1;
560 continue;
561 }
562
563 // Check if this is a SIBLING list item (breaks parent)
564 // Nested lists are indented >= marker_len and are PART of the parent item
565 // Siblings are at indent < marker_len (at or before parent marker)
566 if is_list_item(trimmed) && indent < marker_len {
567 // This is a sibling item at same or higher level - end parent item
568 break;
569 }
570
571 // Check if this is a NESTED list item marker
572 // Nested lists should be processed separately UNLESS they're part of a
573 // multi-paragraph list item (indicated by a blank line before them OR
574 // it's a continuation of an already-started nested list)
575 if is_list_item(trimmed) && indent >= marker_len {
576 // Check if there was a blank line before this (multi-paragraph context)
577 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
578
579 // Check if we've already seen nested list content (another nested item)
580 let has_nested_content = list_item_lines.iter().any(|line| {
581 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
582 || matches!(line, LineType::NestedListItem(_, _))
583 });
584
585 if !has_blank_before && !has_nested_content {
586 // Single-paragraph context with no prior nested items: starts a new item
587 // End parent collection; nested list will be processed next
588 break;
589 }
590 // else: multi-paragraph context or continuation of nested list, keep collecting
591 // Mark this as a nested list item to preserve its structure
592 list_item_lines.push(LineType::NestedListItem(
593 line_info.content(ctx.content)[indent..].to_string(),
594 indent,
595 ));
596 i += 1;
597 continue;
598 }
599
600 // Normal continuation: marker_len to marker_len+3
601 if indent <= marker_len + 3 {
602 // Set actual_indent from first non-code continuation if not set
603 if actual_indent.is_none() {
604 actual_indent = Some(indent);
605 }
606
607 // Extract content (remove indentation and trailing whitespace)
608 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
609 // See: https://github.com/rvben/rumdl/issues/76
610 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
611
612 // Check if this is a fence marker (opening or closing)
613 // These should be treated as code block lines, not paragraph content
614 if is_fence_marker(&content) {
615 list_item_lines.push(LineType::CodeBlock(content, indent));
616 }
617 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
618 else if is_semantic_line(&content) {
619 list_item_lines.push(LineType::SemanticLine(content));
620 }
621 // Check if this is a snippet block delimiter (-8<- or --8<--)
622 // These must be preserved on their own lines for MkDocs Snippets extension
623 else if is_snippet_block_delimiter(&content) {
624 list_item_lines.push(LineType::SnippetLine(content));
625 } else {
626 list_item_lines.push(LineType::Content(content));
627 }
628 i += 1;
629 } else {
630 // indent >= marker_len + 4: indented code block
631 list_item_lines.push(LineType::CodeBlock(
632 line_info.content(ctx.content)[indent..].to_string(),
633 indent,
634 ));
635 i += 1;
636 }
637 } else {
638 // Not indented enough, end of list item
639 break;
640 }
641 }
642
643 // Use detected indent or fallback to marker length
644 let indent_size = actual_indent.unwrap_or(marker_len);
645 let expected_indent = " ".repeat(indent_size);
646
647 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
648 #[derive(Clone)]
649 enum Block {
650 Paragraph(Vec<String>),
651 Code {
652 lines: Vec<(String, usize)>, // (content, indent) pairs
653 has_preceding_blank: bool, // Whether there was a blank line before this block
654 },
655 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
656 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
657 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
658 Html {
659 lines: Vec<String>, // HTML content preserved exactly as-is
660 has_preceding_blank: bool, // Whether there was a blank line before this block
661 },
662 }
663
664 // HTML tag detection helpers
665 // Block-level HTML tags that should trigger HTML block detection
666 const BLOCK_LEVEL_TAGS: &[&str] = &[
667 "div",
668 "details",
669 "summary",
670 "section",
671 "article",
672 "header",
673 "footer",
674 "nav",
675 "aside",
676 "main",
677 "table",
678 "thead",
679 "tbody",
680 "tfoot",
681 "tr",
682 "td",
683 "th",
684 "ul",
685 "ol",
686 "li",
687 "dl",
688 "dt",
689 "dd",
690 "pre",
691 "blockquote",
692 "figure",
693 "figcaption",
694 "form",
695 "fieldset",
696 "legend",
697 "hr",
698 "p",
699 "h1",
700 "h2",
701 "h3",
702 "h4",
703 "h5",
704 "h6",
705 "style",
706 "script",
707 "noscript",
708 ];
709
710 fn is_block_html_opening_tag(line: &str) -> Option<String> {
711 let trimmed = line.trim();
712
713 // Check for HTML comments
714 if trimmed.starts_with("<!--") {
715 return Some("!--".to_string());
716 }
717
718 // Check for opening tags
719 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
720 // Extract tag name from <tagname ...> or <tagname>
721 let after_bracket = &trimmed[1..];
722 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
723 let tag_name = after_bracket[..end].to_lowercase();
724
725 // Only treat as block if it's a known block-level tag
726 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
727 return Some(tag_name);
728 }
729 }
730 }
731 None
732 }
733
734 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
735 let trimmed = line.trim();
736
737 // Special handling for HTML comments
738 if tag_name == "!--" {
739 return trimmed.ends_with("-->");
740 }
741
742 // Check for closing tags: </tagname> or </tagname ...>
743 trimmed.starts_with(&format!("</{tag_name}>"))
744 || trimmed.starts_with(&format!("</{tag_name} "))
745 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
746 }
747
748 fn is_self_closing_tag(line: &str) -> bool {
749 let trimmed = line.trim();
750 trimmed.ends_with("/>")
751 }
752
753 let mut blocks: Vec<Block> = Vec::new();
754 let mut current_paragraph: Vec<String> = Vec::new();
755 let mut current_code_block: Vec<(String, usize)> = Vec::new();
756 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
757 let mut current_html_block: Vec<String> = Vec::new();
758 let mut html_tag_stack: Vec<String> = Vec::new();
759 let mut in_code = false;
760 let mut in_nested_list = false;
761 let mut in_html_block = false;
762 let mut had_preceding_blank = false; // Track if we just saw an empty line
763 let mut code_block_has_preceding_blank = false; // Track blank before current code block
764 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
765
766 for line in &list_item_lines {
767 match line {
768 LineType::Empty => {
769 if in_code {
770 current_code_block.push((String::new(), 0));
771 } else if in_nested_list {
772 current_nested_list.push((String::new(), 0));
773 } else if in_html_block {
774 // Allow blank lines inside HTML blocks
775 current_html_block.push(String::new());
776 } else if !current_paragraph.is_empty() {
777 blocks.push(Block::Paragraph(current_paragraph.clone()));
778 current_paragraph.clear();
779 }
780 // Mark that we saw a blank line
781 had_preceding_blank = true;
782 }
783 LineType::Content(content) => {
784 // Check if we're currently in an HTML block
785 if in_html_block {
786 current_html_block.push(content.clone());
787
788 // Check if this line closes any open HTML tags
789 if let Some(last_tag) = html_tag_stack.last() {
790 if is_html_closing_tag(content, last_tag) {
791 html_tag_stack.pop();
792
793 // If stack is empty, HTML block is complete
794 if html_tag_stack.is_empty() {
795 blocks.push(Block::Html {
796 lines: current_html_block.clone(),
797 has_preceding_blank: html_block_has_preceding_blank,
798 });
799 current_html_block.clear();
800 in_html_block = false;
801 }
802 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
803 // Nested opening tag within HTML block
804 if !is_self_closing_tag(content) {
805 html_tag_stack.push(new_tag);
806 }
807 }
808 }
809 had_preceding_blank = false;
810 } else {
811 // Not in HTML block - check if this line starts one
812 if let Some(tag_name) = is_block_html_opening_tag(content) {
813 // Flush current paragraph before starting HTML block
814 if in_code {
815 blocks.push(Block::Code {
816 lines: current_code_block.clone(),
817 has_preceding_blank: code_block_has_preceding_blank,
818 });
819 current_code_block.clear();
820 in_code = false;
821 } else if in_nested_list {
822 blocks.push(Block::NestedList(current_nested_list.clone()));
823 current_nested_list.clear();
824 in_nested_list = false;
825 } else if !current_paragraph.is_empty() {
826 blocks.push(Block::Paragraph(current_paragraph.clone()));
827 current_paragraph.clear();
828 }
829
830 // Start new HTML block
831 in_html_block = true;
832 html_block_has_preceding_blank = had_preceding_blank;
833 current_html_block.push(content.clone());
834
835 // Check if it's self-closing or needs a closing tag
836 if is_self_closing_tag(content) {
837 // Self-closing tag - complete the HTML block immediately
838 blocks.push(Block::Html {
839 lines: current_html_block.clone(),
840 has_preceding_blank: html_block_has_preceding_blank,
841 });
842 current_html_block.clear();
843 in_html_block = false;
844 } else {
845 // Regular opening tag - push to stack
846 html_tag_stack.push(tag_name);
847 }
848 } else {
849 // Regular content line - add to paragraph
850 if in_code {
851 // Switching from code to content
852 blocks.push(Block::Code {
853 lines: current_code_block.clone(),
854 has_preceding_blank: code_block_has_preceding_blank,
855 });
856 current_code_block.clear();
857 in_code = false;
858 } else if in_nested_list {
859 // Switching from nested list to content
860 blocks.push(Block::NestedList(current_nested_list.clone()));
861 current_nested_list.clear();
862 in_nested_list = false;
863 }
864 current_paragraph.push(content.clone());
865 }
866 had_preceding_blank = false; // Reset after content
867 }
868 }
869 LineType::CodeBlock(content, indent) => {
870 if in_nested_list {
871 // Switching from nested list to code
872 blocks.push(Block::NestedList(current_nested_list.clone()));
873 current_nested_list.clear();
874 in_nested_list = false;
875 } else if in_html_block {
876 // Switching from HTML block to code (shouldn't happen normally, but handle it)
877 blocks.push(Block::Html {
878 lines: current_html_block.clone(),
879 has_preceding_blank: html_block_has_preceding_blank,
880 });
881 current_html_block.clear();
882 html_tag_stack.clear();
883 in_html_block = false;
884 }
885 if !in_code {
886 // Switching from content to code
887 if !current_paragraph.is_empty() {
888 blocks.push(Block::Paragraph(current_paragraph.clone()));
889 current_paragraph.clear();
890 }
891 in_code = true;
892 // Record whether there was a blank line before this code block
893 code_block_has_preceding_blank = had_preceding_blank;
894 }
895 current_code_block.push((content.clone(), *indent));
896 had_preceding_blank = false; // Reset after code
897 }
898 LineType::NestedListItem(content, indent) => {
899 if in_code {
900 // Switching from code to nested list
901 blocks.push(Block::Code {
902 lines: current_code_block.clone(),
903 has_preceding_blank: code_block_has_preceding_blank,
904 });
905 current_code_block.clear();
906 in_code = false;
907 } else if in_html_block {
908 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
909 blocks.push(Block::Html {
910 lines: current_html_block.clone(),
911 has_preceding_blank: html_block_has_preceding_blank,
912 });
913 current_html_block.clear();
914 html_tag_stack.clear();
915 in_html_block = false;
916 }
917 if !in_nested_list {
918 // Switching from content to nested list
919 if !current_paragraph.is_empty() {
920 blocks.push(Block::Paragraph(current_paragraph.clone()));
921 current_paragraph.clear();
922 }
923 in_nested_list = true;
924 }
925 current_nested_list.push((content.clone(), *indent));
926 had_preceding_blank = false; // Reset after nested list
927 }
928 LineType::SemanticLine(content) => {
929 // Semantic lines are standalone - flush any current block and add as separate block
930 if in_code {
931 blocks.push(Block::Code {
932 lines: current_code_block.clone(),
933 has_preceding_blank: code_block_has_preceding_blank,
934 });
935 current_code_block.clear();
936 in_code = false;
937 } else if in_nested_list {
938 blocks.push(Block::NestedList(current_nested_list.clone()));
939 current_nested_list.clear();
940 in_nested_list = false;
941 } else if in_html_block {
942 blocks.push(Block::Html {
943 lines: current_html_block.clone(),
944 has_preceding_blank: html_block_has_preceding_blank,
945 });
946 current_html_block.clear();
947 html_tag_stack.clear();
948 in_html_block = false;
949 } else if !current_paragraph.is_empty() {
950 blocks.push(Block::Paragraph(current_paragraph.clone()));
951 current_paragraph.clear();
952 }
953 // Add semantic line as its own block
954 blocks.push(Block::SemanticLine(content.clone()));
955 had_preceding_blank = false; // Reset after semantic line
956 }
957 LineType::SnippetLine(content) => {
958 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
959 // Unlike semantic lines, snippet lines don't add extra blank lines around them
960 if in_code {
961 blocks.push(Block::Code {
962 lines: current_code_block.clone(),
963 has_preceding_blank: code_block_has_preceding_blank,
964 });
965 current_code_block.clear();
966 in_code = false;
967 } else if in_nested_list {
968 blocks.push(Block::NestedList(current_nested_list.clone()));
969 current_nested_list.clear();
970 in_nested_list = false;
971 } else if in_html_block {
972 blocks.push(Block::Html {
973 lines: current_html_block.clone(),
974 has_preceding_blank: html_block_has_preceding_blank,
975 });
976 current_html_block.clear();
977 html_tag_stack.clear();
978 in_html_block = false;
979 } else if !current_paragraph.is_empty() {
980 blocks.push(Block::Paragraph(current_paragraph.clone()));
981 current_paragraph.clear();
982 }
983 // Add snippet line as its own block
984 blocks.push(Block::SnippetLine(content.clone()));
985 had_preceding_blank = false;
986 }
987 }
988 }
989
990 // Push remaining block
991 if in_code && !current_code_block.is_empty() {
992 blocks.push(Block::Code {
993 lines: current_code_block,
994 has_preceding_blank: code_block_has_preceding_blank,
995 });
996 } else if in_nested_list && !current_nested_list.is_empty() {
997 blocks.push(Block::NestedList(current_nested_list));
998 } else if in_html_block && !current_html_block.is_empty() {
999 // If we still have an unclosed HTML block, push it anyway
1000 // (malformed HTML - missing closing tag)
1001 blocks.push(Block::Html {
1002 lines: current_html_block,
1003 has_preceding_blank: html_block_has_preceding_blank,
1004 });
1005 } else if !current_paragraph.is_empty() {
1006 blocks.push(Block::Paragraph(current_paragraph));
1007 }
1008
1009 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1010 let content_lines: Vec<String> = list_item_lines
1011 .iter()
1012 .filter_map(|line| {
1013 if let LineType::Content(s) = line {
1014 Some(s.clone())
1015 } else {
1016 None
1017 }
1018 })
1019 .collect();
1020
1021 // Check if we need to reflow this list item
1022 // We check the combined content to see if it exceeds length limits
1023 let combined_content = content_lines.join(" ").trim().to_string();
1024 let full_line = format!("{marker}{combined_content}");
1025
1026 // Helper to check if we should reflow in normalize mode
1027 let should_normalize = || {
1028 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1029 // DO normalize if it has plain text content that spans multiple lines
1030 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1031 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1032 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1033 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1034 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1035
1036 // If we have nested lists, code blocks, semantic lines, or snippet lines but no paragraphs, don't normalize
1037 if (has_nested_lists || has_code_blocks || has_semantic_lines || has_snippet_lines)
1038 && !has_paragraphs
1039 {
1040 return false;
1041 }
1042
1043 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1044 if has_paragraphs {
1045 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1046 if paragraph_count > 1 {
1047 // Multiple paragraph blocks should be normalized
1048 return true;
1049 }
1050
1051 // Single paragraph block: normalize if it has multiple content lines
1052 if content_lines.len() > 1 {
1053 return true;
1054 }
1055 }
1056
1057 false
1058 };
1059
1060 let needs_reflow = match config.reflow_mode {
1061 ReflowMode::Normalize => {
1062 // Only reflow if:
1063 // 1. The combined line would exceed the limit, OR
1064 // 2. The list item should be normalized (has multi-line plain text)
1065 let combined_length = self.calculate_effective_length(&full_line);
1066 if combined_length > config.line_length.get() {
1067 true
1068 } else {
1069 should_normalize()
1070 }
1071 }
1072 ReflowMode::SentencePerLine => {
1073 // Check if list item has multiple sentences
1074 let sentences = split_into_sentences(&combined_content);
1075 sentences.len() > 1
1076 }
1077 ReflowMode::Default => {
1078 // In default mode, only reflow if any individual line exceeds limit
1079 // Check the original lines, not the combined content
1080 (list_start..i)
1081 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1082 }
1083 };
1084
1085 if needs_reflow {
1086 let start_range = line_index.whole_line_range(list_start + 1);
1087 let end_line = i - 1;
1088 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1089 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1090 } else {
1091 line_index.whole_line_range(end_line + 1)
1092 };
1093 let byte_range = start_range.start..end_range.end;
1094
1095 // Reflow each block (paragraphs only, preserve code blocks)
1096 // When line_length = 0 (no limit), use a very large value for reflow
1097 let reflow_line_length = if config.line_length.is_unlimited() {
1098 usize::MAX
1099 } else {
1100 config.line_length.get().saturating_sub(indent_size).max(1)
1101 };
1102 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1103 line_length: reflow_line_length,
1104 break_on_sentences: true,
1105 preserve_breaks: false,
1106 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1107 abbreviations: config.abbreviations_for_reflow(),
1108 };
1109
1110 let mut result: Vec<String> = Vec::new();
1111 let mut is_first_block = true;
1112
1113 for (block_idx, block) in blocks.iter().enumerate() {
1114 match block {
1115 Block::Paragraph(para_lines) => {
1116 // Split the paragraph into segments at hard break boundaries
1117 // Each segment can be reflowed independently
1118 let segments = split_into_segments(para_lines);
1119
1120 for (segment_idx, segment) in segments.iter().enumerate() {
1121 // Check if this segment ends with a hard break and what type
1122 let hard_break_type = segment.last().and_then(|line| {
1123 let line = line.strip_suffix('\r').unwrap_or(line);
1124 if line.ends_with('\\') {
1125 Some("\\")
1126 } else if line.ends_with(" ") {
1127 Some(" ")
1128 } else {
1129 None
1130 }
1131 });
1132
1133 // Join and reflow the segment (removing the hard break marker for processing)
1134 let segment_for_reflow: Vec<String> = segment
1135 .iter()
1136 .map(|line| {
1137 // Strip hard break marker (2 spaces or backslash) for reflow processing
1138 if line.ends_with('\\') {
1139 line[..line.len() - 1].trim_end().to_string()
1140 } else if line.ends_with(" ") {
1141 line[..line.len() - 2].trim_end().to_string()
1142 } else {
1143 line.clone()
1144 }
1145 })
1146 .collect();
1147
1148 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1149 if !segment_text.is_empty() {
1150 let reflowed =
1151 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1152
1153 if is_first_block && segment_idx == 0 {
1154 // First segment of first block starts with marker
1155 result.push(format!("{marker}{}", reflowed[0]));
1156 for line in reflowed.iter().skip(1) {
1157 result.push(format!("{expected_indent}{line}"));
1158 }
1159 is_first_block = false;
1160 } else {
1161 // Subsequent segments
1162 for line in reflowed {
1163 result.push(format!("{expected_indent}{line}"));
1164 }
1165 }
1166
1167 // If this segment had a hard break, add it back to the last line
1168 // Preserve the original hard break format (backslash or two spaces)
1169 if let Some(break_marker) = hard_break_type
1170 && let Some(last_line) = result.last_mut()
1171 {
1172 last_line.push_str(break_marker);
1173 }
1174 }
1175 }
1176
1177 // Add blank line after paragraph block if there's a next block
1178 // BUT: check if next block is a code block that doesn't want a preceding blank
1179 // Also don't add blank lines before snippet lines (they should stay tight)
1180 if block_idx < blocks.len() - 1 {
1181 let next_block = &blocks[block_idx + 1];
1182 let should_add_blank = match next_block {
1183 Block::Code {
1184 has_preceding_blank, ..
1185 } => *has_preceding_blank,
1186 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1187 _ => true, // For all other blocks, add blank line
1188 };
1189 if should_add_blank {
1190 result.push(String::new());
1191 }
1192 }
1193 }
1194 Block::Code {
1195 lines: code_lines,
1196 has_preceding_blank: _,
1197 } => {
1198 // Preserve code blocks as-is with original indentation
1199 // NOTE: Blank line before code block is handled by the previous block
1200 // (see paragraph block's logic above)
1201
1202 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1203 if is_first_block && idx == 0 {
1204 // First line of first block gets marker
1205 result.push(format!(
1206 "{marker}{}",
1207 " ".repeat(orig_indent - marker_len) + content
1208 ));
1209 is_first_block = false;
1210 } else if content.is_empty() {
1211 result.push(String::new());
1212 } else {
1213 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1214 }
1215 }
1216 }
1217 Block::NestedList(nested_items) => {
1218 // Preserve nested list items as-is with original indentation
1219 if !is_first_block {
1220 result.push(String::new());
1221 }
1222
1223 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1224 if is_first_block && idx == 0 {
1225 // First line of first block gets marker
1226 result.push(format!(
1227 "{marker}{}",
1228 " ".repeat(orig_indent - marker_len) + content
1229 ));
1230 is_first_block = false;
1231 } else if content.is_empty() {
1232 result.push(String::new());
1233 } else {
1234 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1235 }
1236 }
1237
1238 // Add blank line after nested list if there's a next block
1239 // Check if next block is a code block that doesn't want a preceding blank
1240 if block_idx < blocks.len() - 1 {
1241 let next_block = &blocks[block_idx + 1];
1242 let should_add_blank = match next_block {
1243 Block::Code {
1244 has_preceding_blank, ..
1245 } => *has_preceding_blank,
1246 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1247 _ => true, // For all other blocks, add blank line
1248 };
1249 if should_add_blank {
1250 result.push(String::new());
1251 }
1252 }
1253 }
1254 Block::SemanticLine(content) => {
1255 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1256 // Add blank line before if not first block
1257 if !is_first_block {
1258 result.push(String::new());
1259 }
1260
1261 if is_first_block {
1262 // First block starts with marker
1263 result.push(format!("{marker}{content}"));
1264 is_first_block = false;
1265 } else {
1266 // Subsequent blocks use expected indent
1267 result.push(format!("{expected_indent}{content}"));
1268 }
1269
1270 // Add blank line after semantic line if there's a next block
1271 // Check if next block is a code block that doesn't want a preceding blank
1272 if block_idx < blocks.len() - 1 {
1273 let next_block = &blocks[block_idx + 1];
1274 let should_add_blank = match next_block {
1275 Block::Code {
1276 has_preceding_blank, ..
1277 } => *has_preceding_blank,
1278 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1279 _ => true, // For all other blocks, add blank line
1280 };
1281 if should_add_blank {
1282 result.push(String::new());
1283 }
1284 }
1285 }
1286 Block::SnippetLine(content) => {
1287 // Preserve snippet delimiters (-8<-) as-is on their own line
1288 // Unlike semantic lines, snippet lines don't add extra blank lines
1289 if is_first_block {
1290 // First block starts with marker
1291 result.push(format!("{marker}{content}"));
1292 is_first_block = false;
1293 } else {
1294 // Subsequent blocks use expected indent
1295 result.push(format!("{expected_indent}{content}"));
1296 }
1297 // No blank lines added before or after snippet delimiters
1298 }
1299 Block::Html {
1300 lines: html_lines,
1301 has_preceding_blank: _,
1302 } => {
1303 // Preserve HTML blocks exactly as-is with original indentation
1304 // NOTE: Blank line before HTML block is handled by the previous block
1305
1306 for (idx, line) in html_lines.iter().enumerate() {
1307 if is_first_block && idx == 0 {
1308 // First line of first block gets marker
1309 result.push(format!("{marker}{line}"));
1310 is_first_block = false;
1311 } else if line.is_empty() {
1312 // Preserve blank lines inside HTML blocks
1313 result.push(String::new());
1314 } else {
1315 // Preserve lines with their original content (already includes indentation)
1316 result.push(format!("{expected_indent}{line}"));
1317 }
1318 }
1319
1320 // Add blank line after HTML block if there's a next block
1321 if block_idx < blocks.len() - 1 {
1322 let next_block = &blocks[block_idx + 1];
1323 let should_add_blank = match next_block {
1324 Block::Code {
1325 has_preceding_blank, ..
1326 } => *has_preceding_blank,
1327 Block::Html {
1328 has_preceding_blank, ..
1329 } => *has_preceding_blank,
1330 Block::SnippetLine(_) => false, // No blank line before snippet delimiters
1331 _ => true, // For all other blocks, add blank line
1332 };
1333 if should_add_blank {
1334 result.push(String::new());
1335 }
1336 }
1337 }
1338 }
1339 }
1340
1341 let reflowed_text = result.join("\n");
1342
1343 // Preserve trailing newline
1344 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1345 format!("{reflowed_text}\n")
1346 } else {
1347 reflowed_text
1348 };
1349
1350 // Get the original text to compare
1351 let original_text = &ctx.content[byte_range.clone()];
1352
1353 // Only generate a warning if the replacement is different from the original
1354 if original_text != replacement {
1355 // Generate an appropriate message based on why reflow is needed
1356 let message = match config.reflow_mode {
1357 ReflowMode::SentencePerLine => {
1358 let num_sentences = split_into_sentences(&combined_content).len();
1359 let num_lines = content_lines.len();
1360 if num_lines == 1 {
1361 // Single line with multiple sentences
1362 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1363 } else {
1364 // Multiple lines - could be split sentences or mixed
1365 format!(
1366 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1367 )
1368 }
1369 }
1370 ReflowMode::Normalize => {
1371 let combined_length = self.calculate_effective_length(&full_line);
1372 if combined_length > config.line_length.get() {
1373 format!(
1374 "Line length {} exceeds {} characters",
1375 combined_length,
1376 config.line_length.get()
1377 )
1378 } else {
1379 "Multi-line content can be normalized".to_string()
1380 }
1381 }
1382 ReflowMode::Default => {
1383 let combined_length = self.calculate_effective_length(&full_line);
1384 format!(
1385 "Line length {} exceeds {} characters",
1386 combined_length,
1387 config.line_length.get()
1388 )
1389 }
1390 };
1391
1392 warnings.push(LintWarning {
1393 rule_name: Some(self.name().to_string()),
1394 message,
1395 line: list_start + 1,
1396 column: 1,
1397 end_line: end_line + 1,
1398 end_column: lines[end_line].len() + 1,
1399 severity: Severity::Warning,
1400 fix: Some(crate::rule::Fix {
1401 range: byte_range,
1402 replacement,
1403 }),
1404 });
1405 }
1406 }
1407 continue;
1408 }
1409
1410 // Found start of a paragraph - collect all lines in it
1411 let paragraph_start = i;
1412 let mut paragraph_lines = vec![lines[i]];
1413 i += 1;
1414
1415 while i < lines.len() {
1416 let next_line = lines[i];
1417 let next_line_num = i + 1;
1418 let next_trimmed = next_line.trim();
1419
1420 // Stop at paragraph boundaries
1421 if next_trimmed.is_empty()
1422 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1423 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1424 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1425 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1426 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1427 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1428 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1429 || (next_line_num > 0
1430 && next_line_num <= ctx.lines.len()
1431 && ctx.lines[next_line_num - 1].blockquote.is_some())
1432 || next_trimmed.starts_with('#')
1433 || TableUtils::is_potential_table_row(next_line)
1434 || is_list_item(next_trimmed)
1435 || is_horizontal_rule(next_trimmed)
1436 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1437 || is_template_directive_only(next_line)
1438 || is_standalone_attr_list(next_line)
1439 || is_snippet_block_delimiter(next_line)
1440 {
1441 break;
1442 }
1443
1444 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1445 if i > 0 && has_hard_break(lines[i - 1]) {
1446 // Don't include lines after hard breaks in the same paragraph
1447 break;
1448 }
1449
1450 paragraph_lines.push(next_line);
1451 i += 1;
1452 }
1453
1454 // Combine paragraph lines into a single string for processing
1455 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1456 let paragraph_text = paragraph_lines.join(" ");
1457
1458 // Skip reflowing if this paragraph contains definition list items
1459 // Definition lists are multi-line structures that should not be joined
1460 let contains_definition_list = paragraph_lines
1461 .iter()
1462 .any(|line| crate::utils::is_definition_list_item(line));
1463
1464 if contains_definition_list {
1465 // Don't reflow definition lists - skip this paragraph
1466 i = paragraph_start + paragraph_lines.len();
1467 continue;
1468 }
1469
1470 // Skip reflowing if this paragraph contains MkDocs Snippets markers
1471 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1472 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1473
1474 if contains_snippets {
1475 // Don't reflow Snippets blocks - skip this paragraph
1476 i = paragraph_start + paragraph_lines.len();
1477 continue;
1478 }
1479
1480 // Check if this paragraph needs reflowing
1481 let needs_reflow = match config.reflow_mode {
1482 ReflowMode::Normalize => {
1483 // In normalize mode, reflow multi-line paragraphs
1484 paragraph_lines.len() > 1
1485 }
1486 ReflowMode::SentencePerLine => {
1487 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1488 // Note: we check the joined text because sentences can span multiple lines
1489 let sentences = split_into_sentences(¶graph_text);
1490
1491 // Always reflow if multiple sentences on one line
1492 if sentences.len() > 1 {
1493 true
1494 } else if paragraph_lines.len() > 1 {
1495 // For single-sentence paragraphs spanning multiple lines:
1496 // Reflow if they COULD fit on one line (respecting line-length constraint)
1497 if config.line_length.is_unlimited() {
1498 // No line-length constraint - always join single sentences
1499 true
1500 } else {
1501 // Only join if it fits within line-length
1502 let effective_length = self.calculate_effective_length(¶graph_text);
1503 effective_length <= config.line_length.get()
1504 }
1505 } else {
1506 false
1507 }
1508 }
1509 ReflowMode::Default => {
1510 // In default mode, only reflow if lines exceed limit
1511 paragraph_lines
1512 .iter()
1513 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1514 }
1515 };
1516
1517 if needs_reflow {
1518 // Calculate byte range for this paragraph
1519 // Use whole_line_range for each line and combine
1520 let start_range = line_index.whole_line_range(paragraph_start + 1);
1521 let end_line = paragraph_start + paragraph_lines.len() - 1;
1522
1523 // For the last line, we want to preserve any trailing newline
1524 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1525 // Last line without trailing newline - use line_text_range
1526 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1527 } else {
1528 // Not the last line or has trailing newline - use whole_line_range
1529 line_index.whole_line_range(end_line + 1)
1530 };
1531
1532 let byte_range = start_range.start..end_range.end;
1533
1534 // Check if the paragraph ends with a hard break and what type
1535 let hard_break_type = paragraph_lines.last().and_then(|line| {
1536 let line = line.strip_suffix('\r').unwrap_or(line);
1537 if line.ends_with('\\') {
1538 Some("\\")
1539 } else if line.ends_with(" ") {
1540 Some(" ")
1541 } else {
1542 None
1543 }
1544 });
1545
1546 // Reflow the paragraph
1547 // When line_length = 0 (no limit), use a very large value for reflow
1548 let reflow_line_length = if config.line_length.is_unlimited() {
1549 usize::MAX
1550 } else {
1551 config.line_length.get()
1552 };
1553 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1554 line_length: reflow_line_length,
1555 break_on_sentences: true,
1556 preserve_breaks: false,
1557 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1558 abbreviations: config.abbreviations_for_reflow(),
1559 };
1560 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1561
1562 // If the original paragraph ended with a hard break, preserve it
1563 // Preserve the original hard break format (backslash or two spaces)
1564 if let Some(break_marker) = hard_break_type
1565 && !reflowed.is_empty()
1566 {
1567 let last_idx = reflowed.len() - 1;
1568 if !has_hard_break(&reflowed[last_idx]) {
1569 reflowed[last_idx].push_str(break_marker);
1570 }
1571 }
1572
1573 let reflowed_text = reflowed.join("\n");
1574
1575 // Preserve trailing newline if the original paragraph had one
1576 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1577 format!("{reflowed_text}\n")
1578 } else {
1579 reflowed_text
1580 };
1581
1582 // Get the original text to compare
1583 let original_text = &ctx.content[byte_range.clone()];
1584
1585 // Only generate a warning if the replacement is different from the original
1586 if original_text != replacement {
1587 // Create warning with actual fix
1588 // In default mode, report the specific line that violates
1589 // In normalize mode, report the whole paragraph
1590 // In sentence-per-line mode, report the entire paragraph
1591 let (warning_line, warning_end_line) = match config.reflow_mode {
1592 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1593 ReflowMode::SentencePerLine => {
1594 // Highlight the entire paragraph that needs reformatting
1595 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1596 }
1597 ReflowMode::Default => {
1598 // Find the first line that exceeds the limit
1599 let mut violating_line = paragraph_start;
1600 for (idx, line) in paragraph_lines.iter().enumerate() {
1601 if self.calculate_effective_length(line) > config.line_length.get() {
1602 violating_line = paragraph_start + idx;
1603 break;
1604 }
1605 }
1606 (violating_line + 1, violating_line + 1)
1607 }
1608 };
1609
1610 warnings.push(LintWarning {
1611 rule_name: Some(self.name().to_string()),
1612 message: match config.reflow_mode {
1613 ReflowMode::Normalize => format!(
1614 "Paragraph could be normalized to use line length of {} characters",
1615 config.line_length.get()
1616 ),
1617 ReflowMode::SentencePerLine => {
1618 let num_sentences = split_into_sentences(¶graph_text).len();
1619 if paragraph_lines.len() == 1 {
1620 // Single line with multiple sentences
1621 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1622 } else {
1623 let num_lines = paragraph_lines.len();
1624 // Multiple lines - could be split sentences or mixed
1625 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1626 }
1627 },
1628 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1629 },
1630 line: warning_line,
1631 column: 1,
1632 end_line: warning_end_line,
1633 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1634 severity: Severity::Warning,
1635 fix: Some(crate::rule::Fix {
1636 range: byte_range,
1637 replacement,
1638 }),
1639 });
1640 }
1641 }
1642 }
1643
1644 warnings
1645 }
1646
1647 /// Calculate string length based on the configured length mode
1648 fn calculate_string_length(&self, s: &str) -> usize {
1649 match self.config.length_mode {
1650 LengthMode::Chars => s.chars().count(),
1651 LengthMode::Visual => s.width(),
1652 LengthMode::Bytes => s.len(),
1653 }
1654 }
1655
1656 /// Calculate effective line length excluding unbreakable URLs
1657 fn calculate_effective_length(&self, line: &str) -> usize {
1658 if self.config.strict {
1659 // In strict mode, count everything
1660 return self.calculate_string_length(line);
1661 }
1662
1663 // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1664 let bytes = line.as_bytes();
1665 if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1666 return self.calculate_string_length(line);
1667 }
1668
1669 // More precise check for URLs and links
1670 if !line.contains("http") && !line.contains('[') {
1671 return self.calculate_string_length(line);
1672 }
1673
1674 let mut effective_line = line.to_string();
1675
1676 // First handle markdown links to avoid double-counting URLs
1677 // Pattern: [text](very-long-url) -> [text](url)
1678 if line.contains('[') && line.contains("](") {
1679 for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1680 if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1681 && url.as_str().len() > 15
1682 {
1683 let replacement = format!("[{}](url)", text.as_str());
1684 effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1685 }
1686 }
1687 }
1688
1689 // Then replace bare URLs with a placeholder of reasonable length
1690 // This allows lines with long URLs to pass if the rest of the content is reasonable
1691 if effective_line.contains("http") {
1692 for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1693 let url = url_match.as_str();
1694 // Skip if this URL is already part of a markdown link we handled
1695 if !effective_line.contains(&format!("({url})")) {
1696 // Replace URL with placeholder that represents a "reasonable" URL length
1697 // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1698 let placeholder = "x".repeat(15.min(url.len()));
1699 effective_line = effective_line.replacen(url, &placeholder, 1);
1700 }
1701 }
1702 }
1703
1704 self.calculate_string_length(&effective_line)
1705 }
1706}