rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::range_utils::LineIndex;
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9 IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19 split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30 pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35 Self {
36 config: MD013Config {
37 line_length: crate::types::LineLength::new(line_length),
38 code_blocks,
39 tables,
40 headings,
41 paragraphs: true, // Default to true for backwards compatibility
42 strict,
43 reflow: false,
44 reflow_mode: ReflowMode::default(),
45 length_mode: LengthMode::default(),
46 abbreviations: None,
47 },
48 }
49 }
50
51 pub fn from_config_struct(config: MD013Config) -> Self {
52 Self { config }
53 }
54
55 fn should_ignore_line(
56 &self,
57 line: &str,
58 _lines: &[&str],
59 current_line: usize,
60 ctx: &crate::lint_context::LintContext,
61 ) -> bool {
62 if self.config.strict {
63 return false;
64 }
65
66 // Quick check for common patterns before expensive regex
67 let trimmed = line.trim();
68
69 // Only skip if the entire line is a URL (quick check first)
70 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71 return true;
72 }
73
74 // Only skip if the entire line is an image reference (quick check first)
75 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76 return true;
77 }
78
79 // Only skip if the entire line is a link reference (quick check first)
80 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81 return true;
82 }
83
84 // Code blocks with long strings (only check if in code block)
85 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86 && !trimmed.is_empty()
87 && !line.contains(' ')
88 && !line.contains('\t')
89 {
90 return true;
91 }
92
93 false
94 }
95
96 /// Check if rule should skip based on provided config (used for inline config support)
97 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
98 // Skip if content is empty
99 if ctx.content.is_empty() {
100 return true;
101 }
102
103 // For sentence-per-line or normalize mode, never skip based on line length
104 if config.reflow
105 && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
106 {
107 return false;
108 }
109
110 // Quick check: if total content is shorter than line limit, definitely skip
111 if ctx.content.len() <= config.line_length.get() {
112 return true;
113 }
114
115 // Skip if no line exceeds the limit
116 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
117 }
118}
119
120impl Rule for MD013LineLength {
121 fn name(&self) -> &'static str {
122 "MD013"
123 }
124
125 fn description(&self) -> &'static str {
126 "Line length should not be excessive"
127 }
128
129 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
130 let content = ctx.content;
131
132 // Parse inline configuration FIRST so we can use effective config for should_skip
133 let inline_config = crate::inline_config::InlineConfig::from_content(content);
134 let config_override = inline_config.get_rule_config("MD013");
135
136 // Apply configuration override if present
137 let effective_config = if let Some(json_config) = config_override {
138 if let Some(obj) = json_config.as_object() {
139 let mut config = self.config.clone();
140 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
141 config.line_length = crate::types::LineLength::new(line_length as usize);
142 }
143 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
144 config.code_blocks = code_blocks;
145 }
146 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
147 config.tables = tables;
148 }
149 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
150 config.headings = headings;
151 }
152 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
153 config.strict = strict;
154 }
155 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
156 config.reflow = reflow;
157 }
158 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
159 config.reflow_mode = match reflow_mode {
160 "default" => ReflowMode::Default,
161 "normalize" => ReflowMode::Normalize,
162 "sentence-per-line" => ReflowMode::SentencePerLine,
163 _ => ReflowMode::default(),
164 };
165 }
166 config
167 } else {
168 self.config.clone()
169 }
170 } else {
171 self.config.clone()
172 };
173
174 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
175 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
176 if self.should_skip_with_config(ctx, &effective_config)
177 && !(effective_config.reflow
178 && (effective_config.reflow_mode == ReflowMode::Normalize
179 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
180 {
181 return Ok(Vec::new());
182 }
183
184 // Direct implementation without DocumentStructure
185 let mut warnings = Vec::new();
186
187 // Special handling: line_length = 0 means "no line length limit"
188 // Skip all line length checks, but still allow reflow if enabled
189 let skip_length_checks = effective_config.line_length.is_unlimited();
190
191 // Pre-filter lines that could be problematic to avoid processing all lines
192 let mut candidate_lines = Vec::new();
193 if !skip_length_checks {
194 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
195 // Skip front matter - it should never be linted
196 if line_info.in_front_matter {
197 continue;
198 }
199
200 // Quick length check first
201 if line_info.byte_len > effective_config.line_length.get() {
202 candidate_lines.push(line_idx);
203 }
204 }
205 }
206
207 // If no candidate lines and not in normalize or sentence-per-line mode, early return
208 if candidate_lines.is_empty()
209 && !(effective_config.reflow
210 && (effective_config.reflow_mode == ReflowMode::Normalize
211 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
212 {
213 return Ok(warnings);
214 }
215
216 // Use ctx.lines if available for better performance
217 let lines: Vec<&str> = if !ctx.lines.is_empty() {
218 ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
219 } else {
220 content.lines().collect()
221 };
222
223 // Create a quick lookup set for heading lines
224 // We need this for both the heading skip check AND the paragraphs check
225 let heading_lines_set: std::collections::HashSet<usize> = ctx
226 .lines
227 .iter()
228 .enumerate()
229 .filter(|(_, line)| line.heading.is_some())
230 .map(|(idx, _)| idx + 1)
231 .collect();
232
233 // Use pre-computed table blocks from context
234 // We need this for both the table skip check AND the paragraphs check
235 let table_blocks = &ctx.table_blocks;
236 let mut table_lines_set = std::collections::HashSet::new();
237 for table in table_blocks {
238 table_lines_set.insert(table.header_line + 1);
239 table_lines_set.insert(table.delimiter_line + 1);
240 for &line in &table.content_lines {
241 table_lines_set.insert(line + 1);
242 }
243 }
244
245 // Process candidate lines for line length checks
246 for &line_idx in &candidate_lines {
247 let line_number = line_idx + 1;
248 let line = lines[line_idx];
249
250 // Calculate effective length excluding unbreakable URLs
251 let effective_length = self.calculate_effective_length(line);
252
253 // Use single line length limit for all content
254 let line_limit = effective_config.line_length.get();
255
256 // Skip short lines immediately (double-check after effective length calculation)
257 if effective_length <= line_limit {
258 continue;
259 }
260
261 // Skip mkdocstrings blocks (already handled by LintContext)
262 if ctx.lines[line_idx].in_mkdocstrings {
263 continue;
264 }
265
266 // Skip various block types efficiently
267 if !effective_config.strict {
268 // Skip setext heading underlines
269 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
270 continue;
271 }
272
273 // Skip block elements according to config flags
274 // The flags mean: true = check these elements, false = skip these elements
275 // So we skip when the flag is FALSE and the line is in that element type
276 if (!effective_config.headings && heading_lines_set.contains(&line_number))
277 || (!effective_config.code_blocks
278 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
279 || (!effective_config.tables && table_lines_set.contains(&line_number))
280 || ctx.lines[line_number - 1].blockquote.is_some()
281 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
282 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
283 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
284 {
285 continue;
286 }
287
288 // Check if this is a paragraph/regular text line
289 // If paragraphs = false, skip lines that are NOT in special blocks
290 if !effective_config.paragraphs {
291 let is_special_block = heading_lines_set.contains(&line_number)
292 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
293 || table_lines_set.contains(&line_number)
294 || ctx.lines[line_number - 1].blockquote.is_some()
295 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
296 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
297 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block);
298
299 // Skip regular paragraph text when paragraphs = false
300 if !is_special_block {
301 continue;
302 }
303 }
304
305 // Skip lines that are only a URL, image ref, or link ref
306 if self.should_ignore_line(line, &lines, line_idx, ctx) {
307 continue;
308 }
309 }
310
311 // In sentence-per-line mode, check if this is a single long sentence
312 // If so, emit a warning without a fix (user must manually rephrase)
313 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
314 let sentences = split_into_sentences(line.trim());
315 if sentences.len() == 1 {
316 // Single sentence that's too long - warn but don't auto-fix
317 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
318
319 let (start_line, start_col, end_line, end_col) =
320 calculate_excess_range(line_number, line, line_limit);
321
322 warnings.push(LintWarning {
323 rule_name: Some(self.name().to_string()),
324 message,
325 line: start_line,
326 column: start_col,
327 end_line,
328 end_column: end_col,
329 severity: Severity::Warning,
330 fix: None, // No auto-fix for long single sentences
331 });
332 continue;
333 }
334 // Multiple sentences will be handled by paragraph-based reflow
335 continue;
336 }
337
338 // Don't provide fix for individual lines when reflow is enabled
339 // Paragraph-based fixes will be handled separately
340 let fix = None;
341
342 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
343
344 // Calculate precise character range for the excess portion
345 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
346
347 warnings.push(LintWarning {
348 rule_name: Some(self.name().to_string()),
349 message,
350 line: start_line,
351 column: start_col,
352 end_line,
353 end_column: end_col,
354 severity: Severity::Warning,
355 fix,
356 });
357 }
358
359 // If reflow is enabled, generate paragraph-based fixes
360 if effective_config.reflow {
361 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
362 // Merge paragraph warnings with line warnings, removing duplicates
363 for pw in paragraph_warnings {
364 // Remove any line warnings that overlap with this paragraph
365 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
366 warnings.push(pw);
367 }
368 }
369
370 Ok(warnings)
371 }
372
373 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
374 // For CLI usage, apply fixes from warnings
375 // LSP will use the warning-based fixes directly
376 let warnings = self.check(ctx)?;
377
378 // If there are no fixes, return content unchanged
379 if !warnings.iter().any(|w| w.fix.is_some()) {
380 return Ok(ctx.content.to_string());
381 }
382
383 // Apply warning-based fixes
384 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
385 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
386 }
387
388 fn as_any(&self) -> &dyn std::any::Any {
389 self
390 }
391
392 fn category(&self) -> RuleCategory {
393 RuleCategory::Whitespace
394 }
395
396 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
397 self.should_skip_with_config(ctx, &self.config)
398 }
399
400 fn default_config_section(&self) -> Option<(String, toml::Value)> {
401 let default_config = MD013Config::default();
402 let json_value = serde_json::to_value(&default_config).ok()?;
403 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
404
405 if let toml::Value::Table(table) = toml_value {
406 if !table.is_empty() {
407 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
408 } else {
409 None
410 }
411 } else {
412 None
413 }
414 }
415
416 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
417 let mut aliases = std::collections::HashMap::new();
418 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
419 Some(aliases)
420 }
421
422 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
423 where
424 Self: Sized,
425 {
426 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
427 // Use global line_length if rule-specific config still has default value
428 if rule_config.line_length.get() == 80 {
429 rule_config.line_length = config.global.line_length;
430 }
431 Box::new(Self::from_config_struct(rule_config))
432 }
433}
434
435impl MD013LineLength {
436 /// Generate paragraph-based fixes
437 fn generate_paragraph_fixes(
438 &self,
439 ctx: &crate::lint_context::LintContext,
440 config: &MD013Config,
441 lines: &[&str],
442 ) -> Vec<LintWarning> {
443 let mut warnings = Vec::new();
444 let line_index = LineIndex::new(ctx.content);
445
446 let mut i = 0;
447 while i < lines.len() {
448 let line_num = i + 1;
449
450 // Skip special structures
451 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
452 info.in_code_block
453 || info.in_front_matter
454 || info.in_html_block
455 || info.in_html_comment
456 || info.in_esm_block
457 });
458
459 if should_skip_due_to_line_info
460 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
461 || lines[i].trim().starts_with('#')
462 || TableUtils::is_potential_table_row(lines[i])
463 || lines[i].trim().is_empty()
464 || is_horizontal_rule(lines[i].trim())
465 || is_template_directive_only(lines[i])
466 {
467 i += 1;
468 continue;
469 }
470
471 // Helper function to detect semantic line markers
472 let is_semantic_line = |content: &str| -> bool {
473 let trimmed = content.trim_start();
474 let semantic_markers = [
475 "NOTE:",
476 "WARNING:",
477 "IMPORTANT:",
478 "CAUTION:",
479 "TIP:",
480 "DANGER:",
481 "HINT:",
482 "INFO:",
483 ];
484 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
485 };
486
487 // Helper function to detect fence markers (opening or closing)
488 let is_fence_marker = |content: &str| -> bool {
489 let trimmed = content.trim_start();
490 trimmed.starts_with("```") || trimmed.starts_with("~~~")
491 };
492
493 // Check if this is a list item - handle it specially
494 let trimmed = lines[i].trim();
495 if is_list_item(trimmed) {
496 // Collect the entire list item including continuation lines
497 let list_start = i;
498 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
499 let marker_len = marker.len();
500
501 // Track lines and their types (content, code block, fence, nested list)
502 #[derive(Clone)]
503 enum LineType {
504 Content(String),
505 CodeBlock(String, usize), // content and original indent
506 NestedListItem(String, usize), // full line content and original indent
507 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
508 Empty,
509 }
510
511 let mut actual_indent: Option<usize> = None;
512 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
513 i += 1;
514
515 // Collect continuation lines using ctx.lines for metadata
516 while i < lines.len() {
517 let line_info = &ctx.lines[i];
518
519 // Use pre-computed is_blank from ctx
520 if line_info.is_blank {
521 // Empty line - check if next line is indented (part of list item)
522 if i + 1 < lines.len() {
523 let next_info = &ctx.lines[i + 1];
524
525 // Check if next line is indented enough to be continuation
526 if !next_info.is_blank && next_info.indent >= marker_len {
527 // This blank line is between paragraphs/blocks in the list item
528 list_item_lines.push(LineType::Empty);
529 i += 1;
530 continue;
531 }
532 }
533 // No indented line after blank, end of list item
534 break;
535 }
536
537 // Use pre-computed indent from ctx
538 let indent = line_info.indent;
539
540 // Valid continuation must be indented at least marker_len
541 if indent >= marker_len {
542 let trimmed = line_info.content(ctx.content).trim();
543
544 // Use pre-computed in_code_block from ctx
545 if line_info.in_code_block {
546 list_item_lines.push(LineType::CodeBlock(
547 line_info.content(ctx.content)[indent..].to_string(),
548 indent,
549 ));
550 i += 1;
551 continue;
552 }
553
554 // Check if this is a SIBLING list item (breaks parent)
555 // Nested lists are indented >= marker_len and are PART of the parent item
556 // Siblings are at indent < marker_len (at or before parent marker)
557 if is_list_item(trimmed) && indent < marker_len {
558 // This is a sibling item at same or higher level - end parent item
559 break;
560 }
561
562 // Check if this is a NESTED list item marker
563 // Nested lists should be processed separately UNLESS they're part of a
564 // multi-paragraph list item (indicated by a blank line before them OR
565 // it's a continuation of an already-started nested list)
566 if is_list_item(trimmed) && indent >= marker_len {
567 // Check if there was a blank line before this (multi-paragraph context)
568 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
569
570 // Check if we've already seen nested list content (another nested item)
571 let has_nested_content = list_item_lines.iter().any(|line| {
572 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
573 || matches!(line, LineType::NestedListItem(_, _))
574 });
575
576 if !has_blank_before && !has_nested_content {
577 // Single-paragraph context with no prior nested items: starts a new item
578 // End parent collection; nested list will be processed next
579 break;
580 }
581 // else: multi-paragraph context or continuation of nested list, keep collecting
582 // Mark this as a nested list item to preserve its structure
583 list_item_lines.push(LineType::NestedListItem(
584 line_info.content(ctx.content)[indent..].to_string(),
585 indent,
586 ));
587 i += 1;
588 continue;
589 }
590
591 // Normal continuation: marker_len to marker_len+3
592 if indent <= marker_len + 3 {
593 // Set actual_indent from first non-code continuation if not set
594 if actual_indent.is_none() {
595 actual_indent = Some(indent);
596 }
597
598 // Extract content (remove indentation and trailing whitespace)
599 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
600 // See: https://github.com/rvben/rumdl/issues/76
601 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
602
603 // Check if this is a fence marker (opening or closing)
604 // These should be treated as code block lines, not paragraph content
605 if is_fence_marker(&content) {
606 list_item_lines.push(LineType::CodeBlock(content, indent));
607 }
608 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
609 else if is_semantic_line(&content) {
610 list_item_lines.push(LineType::SemanticLine(content));
611 } else {
612 list_item_lines.push(LineType::Content(content));
613 }
614 i += 1;
615 } else {
616 // indent >= marker_len + 4: indented code block
617 list_item_lines.push(LineType::CodeBlock(
618 line_info.content(ctx.content)[indent..].to_string(),
619 indent,
620 ));
621 i += 1;
622 }
623 } else {
624 // Not indented enough, end of list item
625 break;
626 }
627 }
628
629 // Use detected indent or fallback to marker length
630 let indent_size = actual_indent.unwrap_or(marker_len);
631 let expected_indent = " ".repeat(indent_size);
632
633 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
634 #[derive(Clone)]
635 enum Block {
636 Paragraph(Vec<String>),
637 Code {
638 lines: Vec<(String, usize)>, // (content, indent) pairs
639 has_preceding_blank: bool, // Whether there was a blank line before this block
640 },
641 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
642 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
643 Html {
644 lines: Vec<String>, // HTML content preserved exactly as-is
645 has_preceding_blank: bool, // Whether there was a blank line before this block
646 },
647 }
648
649 // HTML tag detection helpers
650 // Block-level HTML tags that should trigger HTML block detection
651 const BLOCK_LEVEL_TAGS: &[&str] = &[
652 "div",
653 "details",
654 "summary",
655 "section",
656 "article",
657 "header",
658 "footer",
659 "nav",
660 "aside",
661 "main",
662 "table",
663 "thead",
664 "tbody",
665 "tfoot",
666 "tr",
667 "td",
668 "th",
669 "ul",
670 "ol",
671 "li",
672 "dl",
673 "dt",
674 "dd",
675 "pre",
676 "blockquote",
677 "figure",
678 "figcaption",
679 "form",
680 "fieldset",
681 "legend",
682 "hr",
683 "p",
684 "h1",
685 "h2",
686 "h3",
687 "h4",
688 "h5",
689 "h6",
690 "style",
691 "script",
692 "noscript",
693 ];
694
695 fn is_block_html_opening_tag(line: &str) -> Option<String> {
696 let trimmed = line.trim();
697
698 // Check for HTML comments
699 if trimmed.starts_with("<!--") {
700 return Some("!--".to_string());
701 }
702
703 // Check for opening tags
704 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
705 // Extract tag name from <tagname ...> or <tagname>
706 let after_bracket = &trimmed[1..];
707 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
708 let tag_name = after_bracket[..end].to_lowercase();
709
710 // Only treat as block if it's a known block-level tag
711 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
712 return Some(tag_name);
713 }
714 }
715 }
716 None
717 }
718
719 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
720 let trimmed = line.trim();
721
722 // Special handling for HTML comments
723 if tag_name == "!--" {
724 return trimmed.ends_with("-->");
725 }
726
727 // Check for closing tags: </tagname> or </tagname ...>
728 trimmed.starts_with(&format!("</{tag_name}>"))
729 || trimmed.starts_with(&format!("</{tag_name} "))
730 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
731 }
732
733 fn is_self_closing_tag(line: &str) -> bool {
734 let trimmed = line.trim();
735 trimmed.ends_with("/>")
736 }
737
738 let mut blocks: Vec<Block> = Vec::new();
739 let mut current_paragraph: Vec<String> = Vec::new();
740 let mut current_code_block: Vec<(String, usize)> = Vec::new();
741 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
742 let mut current_html_block: Vec<String> = Vec::new();
743 let mut html_tag_stack: Vec<String> = Vec::new();
744 let mut in_code = false;
745 let mut in_nested_list = false;
746 let mut in_html_block = false;
747 let mut had_preceding_blank = false; // Track if we just saw an empty line
748 let mut code_block_has_preceding_blank = false; // Track blank before current code block
749 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
750
751 for line in &list_item_lines {
752 match line {
753 LineType::Empty => {
754 if in_code {
755 current_code_block.push((String::new(), 0));
756 } else if in_nested_list {
757 current_nested_list.push((String::new(), 0));
758 } else if in_html_block {
759 // Allow blank lines inside HTML blocks
760 current_html_block.push(String::new());
761 } else if !current_paragraph.is_empty() {
762 blocks.push(Block::Paragraph(current_paragraph.clone()));
763 current_paragraph.clear();
764 }
765 // Mark that we saw a blank line
766 had_preceding_blank = true;
767 }
768 LineType::Content(content) => {
769 // Check if we're currently in an HTML block
770 if in_html_block {
771 current_html_block.push(content.clone());
772
773 // Check if this line closes any open HTML tags
774 if let Some(last_tag) = html_tag_stack.last() {
775 if is_html_closing_tag(content, last_tag) {
776 html_tag_stack.pop();
777
778 // If stack is empty, HTML block is complete
779 if html_tag_stack.is_empty() {
780 blocks.push(Block::Html {
781 lines: current_html_block.clone(),
782 has_preceding_blank: html_block_has_preceding_blank,
783 });
784 current_html_block.clear();
785 in_html_block = false;
786 }
787 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
788 // Nested opening tag within HTML block
789 if !is_self_closing_tag(content) {
790 html_tag_stack.push(new_tag);
791 }
792 }
793 }
794 had_preceding_blank = false;
795 } else {
796 // Not in HTML block - check if this line starts one
797 if let Some(tag_name) = is_block_html_opening_tag(content) {
798 // Flush current paragraph before starting HTML block
799 if in_code {
800 blocks.push(Block::Code {
801 lines: current_code_block.clone(),
802 has_preceding_blank: code_block_has_preceding_blank,
803 });
804 current_code_block.clear();
805 in_code = false;
806 } else if in_nested_list {
807 blocks.push(Block::NestedList(current_nested_list.clone()));
808 current_nested_list.clear();
809 in_nested_list = false;
810 } else if !current_paragraph.is_empty() {
811 blocks.push(Block::Paragraph(current_paragraph.clone()));
812 current_paragraph.clear();
813 }
814
815 // Start new HTML block
816 in_html_block = true;
817 html_block_has_preceding_blank = had_preceding_blank;
818 current_html_block.push(content.clone());
819
820 // Check if it's self-closing or needs a closing tag
821 if is_self_closing_tag(content) {
822 // Self-closing tag - complete the HTML block immediately
823 blocks.push(Block::Html {
824 lines: current_html_block.clone(),
825 has_preceding_blank: html_block_has_preceding_blank,
826 });
827 current_html_block.clear();
828 in_html_block = false;
829 } else {
830 // Regular opening tag - push to stack
831 html_tag_stack.push(tag_name);
832 }
833 } else {
834 // Regular content line - add to paragraph
835 if in_code {
836 // Switching from code to content
837 blocks.push(Block::Code {
838 lines: current_code_block.clone(),
839 has_preceding_blank: code_block_has_preceding_blank,
840 });
841 current_code_block.clear();
842 in_code = false;
843 } else if in_nested_list {
844 // Switching from nested list to content
845 blocks.push(Block::NestedList(current_nested_list.clone()));
846 current_nested_list.clear();
847 in_nested_list = false;
848 }
849 current_paragraph.push(content.clone());
850 }
851 had_preceding_blank = false; // Reset after content
852 }
853 }
854 LineType::CodeBlock(content, indent) => {
855 if in_nested_list {
856 // Switching from nested list to code
857 blocks.push(Block::NestedList(current_nested_list.clone()));
858 current_nested_list.clear();
859 in_nested_list = false;
860 } else if in_html_block {
861 // Switching from HTML block to code (shouldn't happen normally, but handle it)
862 blocks.push(Block::Html {
863 lines: current_html_block.clone(),
864 has_preceding_blank: html_block_has_preceding_blank,
865 });
866 current_html_block.clear();
867 html_tag_stack.clear();
868 in_html_block = false;
869 }
870 if !in_code {
871 // Switching from content to code
872 if !current_paragraph.is_empty() {
873 blocks.push(Block::Paragraph(current_paragraph.clone()));
874 current_paragraph.clear();
875 }
876 in_code = true;
877 // Record whether there was a blank line before this code block
878 code_block_has_preceding_blank = had_preceding_blank;
879 }
880 current_code_block.push((content.clone(), *indent));
881 had_preceding_blank = false; // Reset after code
882 }
883 LineType::NestedListItem(content, indent) => {
884 if in_code {
885 // Switching from code to nested list
886 blocks.push(Block::Code {
887 lines: current_code_block.clone(),
888 has_preceding_blank: code_block_has_preceding_blank,
889 });
890 current_code_block.clear();
891 in_code = false;
892 } else if in_html_block {
893 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
894 blocks.push(Block::Html {
895 lines: current_html_block.clone(),
896 has_preceding_blank: html_block_has_preceding_blank,
897 });
898 current_html_block.clear();
899 html_tag_stack.clear();
900 in_html_block = false;
901 }
902 if !in_nested_list {
903 // Switching from content to nested list
904 if !current_paragraph.is_empty() {
905 blocks.push(Block::Paragraph(current_paragraph.clone()));
906 current_paragraph.clear();
907 }
908 in_nested_list = true;
909 }
910 current_nested_list.push((content.clone(), *indent));
911 had_preceding_blank = false; // Reset after nested list
912 }
913 LineType::SemanticLine(content) => {
914 // Semantic lines are standalone - flush any current block and add as separate block
915 if in_code {
916 blocks.push(Block::Code {
917 lines: current_code_block.clone(),
918 has_preceding_blank: code_block_has_preceding_blank,
919 });
920 current_code_block.clear();
921 in_code = false;
922 } else if in_nested_list {
923 blocks.push(Block::NestedList(current_nested_list.clone()));
924 current_nested_list.clear();
925 in_nested_list = false;
926 } else if in_html_block {
927 blocks.push(Block::Html {
928 lines: current_html_block.clone(),
929 has_preceding_blank: html_block_has_preceding_blank,
930 });
931 current_html_block.clear();
932 html_tag_stack.clear();
933 in_html_block = false;
934 } else if !current_paragraph.is_empty() {
935 blocks.push(Block::Paragraph(current_paragraph.clone()));
936 current_paragraph.clear();
937 }
938 // Add semantic line as its own block
939 blocks.push(Block::SemanticLine(content.clone()));
940 had_preceding_blank = false; // Reset after semantic line
941 }
942 }
943 }
944
945 // Push remaining block
946 if in_code && !current_code_block.is_empty() {
947 blocks.push(Block::Code {
948 lines: current_code_block,
949 has_preceding_blank: code_block_has_preceding_blank,
950 });
951 } else if in_nested_list && !current_nested_list.is_empty() {
952 blocks.push(Block::NestedList(current_nested_list));
953 } else if in_html_block && !current_html_block.is_empty() {
954 // If we still have an unclosed HTML block, push it anyway
955 // (malformed HTML - missing closing tag)
956 blocks.push(Block::Html {
957 lines: current_html_block,
958 has_preceding_blank: html_block_has_preceding_blank,
959 });
960 } else if !current_paragraph.is_empty() {
961 blocks.push(Block::Paragraph(current_paragraph));
962 }
963
964 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
965 let content_lines: Vec<String> = list_item_lines
966 .iter()
967 .filter_map(|line| {
968 if let LineType::Content(s) = line {
969 Some(s.clone())
970 } else {
971 None
972 }
973 })
974 .collect();
975
976 // Check if we need to reflow this list item
977 // We check the combined content to see if it exceeds length limits
978 let combined_content = content_lines.join(" ").trim().to_string();
979 let full_line = format!("{marker}{combined_content}");
980
981 // Helper to check if we should reflow in normalize mode
982 let should_normalize = || {
983 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
984 // DO normalize if it has plain text content that spans multiple lines
985 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
986 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
987 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
988 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
989
990 // If we have nested lists, code blocks, or semantic lines but no paragraphs, don't normalize
991 if (has_nested_lists || has_code_blocks || has_semantic_lines) && !has_paragraphs {
992 return false;
993 }
994
995 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
996 if has_paragraphs {
997 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
998 if paragraph_count > 1 {
999 // Multiple paragraph blocks should be normalized
1000 return true;
1001 }
1002
1003 // Single paragraph block: normalize if it has multiple content lines
1004 if content_lines.len() > 1 {
1005 return true;
1006 }
1007 }
1008
1009 false
1010 };
1011
1012 let needs_reflow = match config.reflow_mode {
1013 ReflowMode::Normalize => {
1014 // Only reflow if:
1015 // 1. The combined line would exceed the limit, OR
1016 // 2. The list item should be normalized (has multi-line plain text)
1017 let combined_length = self.calculate_effective_length(&full_line);
1018 if combined_length > config.line_length.get() {
1019 true
1020 } else {
1021 should_normalize()
1022 }
1023 }
1024 ReflowMode::SentencePerLine => {
1025 // Check if list item has multiple sentences
1026 let sentences = split_into_sentences(&combined_content);
1027 sentences.len() > 1
1028 }
1029 ReflowMode::Default => {
1030 // In default mode, only reflow if any individual line exceeds limit
1031 // Check the original lines, not the combined content
1032 (list_start..i)
1033 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1034 }
1035 };
1036
1037 if needs_reflow {
1038 let start_range = line_index.whole_line_range(list_start + 1);
1039 let end_line = i - 1;
1040 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1041 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1042 } else {
1043 line_index.whole_line_range(end_line + 1)
1044 };
1045 let byte_range = start_range.start..end_range.end;
1046
1047 // Reflow each block (paragraphs only, preserve code blocks)
1048 // When line_length = 0 (no limit), use a very large value for reflow
1049 let reflow_line_length = if config.line_length.is_unlimited() {
1050 usize::MAX
1051 } else {
1052 config.line_length.get().saturating_sub(indent_size).max(1)
1053 };
1054 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1055 line_length: reflow_line_length,
1056 break_on_sentences: true,
1057 preserve_breaks: false,
1058 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1059 abbreviations: config.abbreviations.clone(),
1060 };
1061
1062 let mut result: Vec<String> = Vec::new();
1063 let mut is_first_block = true;
1064
1065 for (block_idx, block) in blocks.iter().enumerate() {
1066 match block {
1067 Block::Paragraph(para_lines) => {
1068 // Split the paragraph into segments at hard break boundaries
1069 // Each segment can be reflowed independently
1070 let segments = split_into_segments(para_lines);
1071
1072 for (segment_idx, segment) in segments.iter().enumerate() {
1073 // Check if this segment ends with a hard break and what type
1074 let hard_break_type = segment.last().and_then(|line| {
1075 let line = line.strip_suffix('\r').unwrap_or(line);
1076 if line.ends_with('\\') {
1077 Some("\\")
1078 } else if line.ends_with(" ") {
1079 Some(" ")
1080 } else {
1081 None
1082 }
1083 });
1084
1085 // Join and reflow the segment (removing the hard break marker for processing)
1086 let segment_for_reflow: Vec<String> = segment
1087 .iter()
1088 .map(|line| {
1089 // Strip hard break marker (2 spaces or backslash) for reflow processing
1090 if line.ends_with('\\') {
1091 line[..line.len() - 1].trim_end().to_string()
1092 } else if line.ends_with(" ") {
1093 line[..line.len() - 2].trim_end().to_string()
1094 } else {
1095 line.clone()
1096 }
1097 })
1098 .collect();
1099
1100 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1101 if !segment_text.is_empty() {
1102 let reflowed =
1103 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1104
1105 if is_first_block && segment_idx == 0 {
1106 // First segment of first block starts with marker
1107 result.push(format!("{marker}{}", reflowed[0]));
1108 for line in reflowed.iter().skip(1) {
1109 result.push(format!("{expected_indent}{line}"));
1110 }
1111 is_first_block = false;
1112 } else {
1113 // Subsequent segments
1114 for line in reflowed {
1115 result.push(format!("{expected_indent}{line}"));
1116 }
1117 }
1118
1119 // If this segment had a hard break, add it back to the last line
1120 // Preserve the original hard break format (backslash or two spaces)
1121 if let Some(break_marker) = hard_break_type
1122 && let Some(last_line) = result.last_mut()
1123 {
1124 last_line.push_str(break_marker);
1125 }
1126 }
1127 }
1128
1129 // Add blank line after paragraph block if there's a next block
1130 // BUT: check if next block is a code block that doesn't want a preceding blank
1131 if block_idx < blocks.len() - 1 {
1132 let next_block = &blocks[block_idx + 1];
1133 let should_add_blank = match next_block {
1134 Block::Code {
1135 has_preceding_blank, ..
1136 } => *has_preceding_blank,
1137 _ => true, // For all other blocks, add blank line
1138 };
1139 if should_add_blank {
1140 result.push(String::new());
1141 }
1142 }
1143 }
1144 Block::Code {
1145 lines: code_lines,
1146 has_preceding_blank: _,
1147 } => {
1148 // Preserve code blocks as-is with original indentation
1149 // NOTE: Blank line before code block is handled by the previous block
1150 // (see paragraph block's logic above)
1151
1152 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1153 if is_first_block && idx == 0 {
1154 // First line of first block gets marker
1155 result.push(format!(
1156 "{marker}{}",
1157 " ".repeat(orig_indent - marker_len) + content
1158 ));
1159 is_first_block = false;
1160 } else if content.is_empty() {
1161 result.push(String::new());
1162 } else {
1163 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1164 }
1165 }
1166 }
1167 Block::NestedList(nested_items) => {
1168 // Preserve nested list items as-is with original indentation
1169 if !is_first_block {
1170 result.push(String::new());
1171 }
1172
1173 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1174 if is_first_block && idx == 0 {
1175 // First line of first block gets marker
1176 result.push(format!(
1177 "{marker}{}",
1178 " ".repeat(orig_indent - marker_len) + content
1179 ));
1180 is_first_block = false;
1181 } else if content.is_empty() {
1182 result.push(String::new());
1183 } else {
1184 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1185 }
1186 }
1187
1188 // Add blank line after nested list if there's a next block
1189 // Check if next block is a code block that doesn't want a preceding blank
1190 if block_idx < blocks.len() - 1 {
1191 let next_block = &blocks[block_idx + 1];
1192 let should_add_blank = match next_block {
1193 Block::Code {
1194 has_preceding_blank, ..
1195 } => *has_preceding_blank,
1196 _ => true, // For all other blocks, add blank line
1197 };
1198 if should_add_blank {
1199 result.push(String::new());
1200 }
1201 }
1202 }
1203 Block::SemanticLine(content) => {
1204 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1205 // Add blank line before if not first block
1206 if !is_first_block {
1207 result.push(String::new());
1208 }
1209
1210 if is_first_block {
1211 // First block starts with marker
1212 result.push(format!("{marker}{content}"));
1213 is_first_block = false;
1214 } else {
1215 // Subsequent blocks use expected indent
1216 result.push(format!("{expected_indent}{content}"));
1217 }
1218
1219 // Add blank line after semantic line if there's a next block
1220 // Check if next block is a code block that doesn't want a preceding blank
1221 if block_idx < blocks.len() - 1 {
1222 let next_block = &blocks[block_idx + 1];
1223 let should_add_blank = match next_block {
1224 Block::Code {
1225 has_preceding_blank, ..
1226 } => *has_preceding_blank,
1227 _ => true, // For all other blocks, add blank line
1228 };
1229 if should_add_blank {
1230 result.push(String::new());
1231 }
1232 }
1233 }
1234 Block::Html {
1235 lines: html_lines,
1236 has_preceding_blank: _,
1237 } => {
1238 // Preserve HTML blocks exactly as-is with original indentation
1239 // NOTE: Blank line before HTML block is handled by the previous block
1240
1241 for (idx, line) in html_lines.iter().enumerate() {
1242 if is_first_block && idx == 0 {
1243 // First line of first block gets marker
1244 result.push(format!("{marker}{line}"));
1245 is_first_block = false;
1246 } else if line.is_empty() {
1247 // Preserve blank lines inside HTML blocks
1248 result.push(String::new());
1249 } else {
1250 // Preserve lines with their original content (already includes indentation)
1251 result.push(format!("{expected_indent}{line}"));
1252 }
1253 }
1254
1255 // Add blank line after HTML block if there's a next block
1256 if block_idx < blocks.len() - 1 {
1257 let next_block = &blocks[block_idx + 1];
1258 let should_add_blank = match next_block {
1259 Block::Code {
1260 has_preceding_blank, ..
1261 } => *has_preceding_blank,
1262 Block::Html {
1263 has_preceding_blank, ..
1264 } => *has_preceding_blank,
1265 _ => true, // For all other blocks, add blank line
1266 };
1267 if should_add_blank {
1268 result.push(String::new());
1269 }
1270 }
1271 }
1272 }
1273 }
1274
1275 let reflowed_text = result.join("\n");
1276
1277 // Preserve trailing newline
1278 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1279 format!("{reflowed_text}\n")
1280 } else {
1281 reflowed_text
1282 };
1283
1284 // Get the original text to compare
1285 let original_text = &ctx.content[byte_range.clone()];
1286
1287 // Only generate a warning if the replacement is different from the original
1288 if original_text != replacement {
1289 // Generate an appropriate message based on why reflow is needed
1290 let message = match config.reflow_mode {
1291 ReflowMode::SentencePerLine => {
1292 let num_sentences = split_into_sentences(&combined_content).len();
1293 let num_lines = content_lines.len();
1294 if num_lines == 1 {
1295 // Single line with multiple sentences
1296 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1297 } else {
1298 // Multiple lines - could be split sentences or mixed
1299 format!(
1300 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1301 )
1302 }
1303 }
1304 ReflowMode::Normalize => {
1305 let combined_length = self.calculate_effective_length(&full_line);
1306 if combined_length > config.line_length.get() {
1307 format!(
1308 "Line length {} exceeds {} characters",
1309 combined_length,
1310 config.line_length.get()
1311 )
1312 } else {
1313 "Multi-line content can be normalized".to_string()
1314 }
1315 }
1316 ReflowMode::Default => {
1317 let combined_length = self.calculate_effective_length(&full_line);
1318 format!(
1319 "Line length {} exceeds {} characters",
1320 combined_length,
1321 config.line_length.get()
1322 )
1323 }
1324 };
1325
1326 warnings.push(LintWarning {
1327 rule_name: Some(self.name().to_string()),
1328 message,
1329 line: list_start + 1,
1330 column: 1,
1331 end_line: end_line + 1,
1332 end_column: lines[end_line].len() + 1,
1333 severity: Severity::Warning,
1334 fix: Some(crate::rule::Fix {
1335 range: byte_range,
1336 replacement,
1337 }),
1338 });
1339 }
1340 }
1341 continue;
1342 }
1343
1344 // Found start of a paragraph - collect all lines in it
1345 let paragraph_start = i;
1346 let mut paragraph_lines = vec![lines[i]];
1347 i += 1;
1348
1349 while i < lines.len() {
1350 let next_line = lines[i];
1351 let next_line_num = i + 1;
1352 let next_trimmed = next_line.trim();
1353
1354 // Stop at paragraph boundaries
1355 if next_trimmed.is_empty()
1356 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1357 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1358 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1359 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1360 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1361 || (next_line_num > 0
1362 && next_line_num <= ctx.lines.len()
1363 && ctx.lines[next_line_num - 1].blockquote.is_some())
1364 || next_trimmed.starts_with('#')
1365 || TableUtils::is_potential_table_row(next_line)
1366 || is_list_item(next_trimmed)
1367 || is_horizontal_rule(next_trimmed)
1368 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1369 || is_template_directive_only(next_line)
1370 {
1371 break;
1372 }
1373
1374 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1375 if i > 0 && has_hard_break(lines[i - 1]) {
1376 // Don't include lines after hard breaks in the same paragraph
1377 break;
1378 }
1379
1380 paragraph_lines.push(next_line);
1381 i += 1;
1382 }
1383
1384 // Combine paragraph lines into a single string for processing
1385 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1386 let paragraph_text = paragraph_lines.join(" ");
1387
1388 // Skip reflowing if this paragraph contains definition list items
1389 // Definition lists are multi-line structures that should not be joined
1390 let contains_definition_list = paragraph_lines
1391 .iter()
1392 .any(|line| crate::utils::is_definition_list_item(line));
1393
1394 if contains_definition_list {
1395 // Don't reflow definition lists - skip this paragraph
1396 i = paragraph_start + paragraph_lines.len();
1397 continue;
1398 }
1399
1400 // Check if this paragraph needs reflowing
1401 let needs_reflow = match config.reflow_mode {
1402 ReflowMode::Normalize => {
1403 // In normalize mode, reflow multi-line paragraphs
1404 paragraph_lines.len() > 1
1405 }
1406 ReflowMode::SentencePerLine => {
1407 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1408 // Note: we check the joined text because sentences can span multiple lines
1409 let sentences = split_into_sentences(¶graph_text);
1410
1411 // Always reflow if multiple sentences on one line
1412 if sentences.len() > 1 {
1413 true
1414 } else if paragraph_lines.len() > 1 {
1415 // For single-sentence paragraphs spanning multiple lines:
1416 // Reflow if they COULD fit on one line (respecting line-length constraint)
1417 if config.line_length.is_unlimited() {
1418 // No line-length constraint - always join single sentences
1419 true
1420 } else {
1421 // Only join if it fits within line-length
1422 let effective_length = self.calculate_effective_length(¶graph_text);
1423 effective_length <= config.line_length.get()
1424 }
1425 } else {
1426 false
1427 }
1428 }
1429 ReflowMode::Default => {
1430 // In default mode, only reflow if lines exceed limit
1431 paragraph_lines
1432 .iter()
1433 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1434 }
1435 };
1436
1437 if needs_reflow {
1438 // Calculate byte range for this paragraph
1439 // Use whole_line_range for each line and combine
1440 let start_range = line_index.whole_line_range(paragraph_start + 1);
1441 let end_line = paragraph_start + paragraph_lines.len() - 1;
1442
1443 // For the last line, we want to preserve any trailing newline
1444 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1445 // Last line without trailing newline - use line_text_range
1446 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1447 } else {
1448 // Not the last line or has trailing newline - use whole_line_range
1449 line_index.whole_line_range(end_line + 1)
1450 };
1451
1452 let byte_range = start_range.start..end_range.end;
1453
1454 // Check if the paragraph ends with a hard break and what type
1455 let hard_break_type = paragraph_lines.last().and_then(|line| {
1456 let line = line.strip_suffix('\r').unwrap_or(line);
1457 if line.ends_with('\\') {
1458 Some("\\")
1459 } else if line.ends_with(" ") {
1460 Some(" ")
1461 } else {
1462 None
1463 }
1464 });
1465
1466 // Reflow the paragraph
1467 // When line_length = 0 (no limit), use a very large value for reflow
1468 let reflow_line_length = if config.line_length.is_unlimited() {
1469 usize::MAX
1470 } else {
1471 config.line_length.get()
1472 };
1473 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1474 line_length: reflow_line_length,
1475 break_on_sentences: true,
1476 preserve_breaks: false,
1477 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1478 abbreviations: config.abbreviations.clone(),
1479 };
1480 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1481
1482 // If the original paragraph ended with a hard break, preserve it
1483 // Preserve the original hard break format (backslash or two spaces)
1484 if let Some(break_marker) = hard_break_type
1485 && !reflowed.is_empty()
1486 {
1487 let last_idx = reflowed.len() - 1;
1488 if !has_hard_break(&reflowed[last_idx]) {
1489 reflowed[last_idx].push_str(break_marker);
1490 }
1491 }
1492
1493 let reflowed_text = reflowed.join("\n");
1494
1495 // Preserve trailing newline if the original paragraph had one
1496 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1497 format!("{reflowed_text}\n")
1498 } else {
1499 reflowed_text
1500 };
1501
1502 // Get the original text to compare
1503 let original_text = &ctx.content[byte_range.clone()];
1504
1505 // Only generate a warning if the replacement is different from the original
1506 if original_text != replacement {
1507 // Create warning with actual fix
1508 // In default mode, report the specific line that violates
1509 // In normalize mode, report the whole paragraph
1510 // In sentence-per-line mode, report the entire paragraph
1511 let (warning_line, warning_end_line) = match config.reflow_mode {
1512 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1513 ReflowMode::SentencePerLine => {
1514 // Highlight the entire paragraph that needs reformatting
1515 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1516 }
1517 ReflowMode::Default => {
1518 // Find the first line that exceeds the limit
1519 let mut violating_line = paragraph_start;
1520 for (idx, line) in paragraph_lines.iter().enumerate() {
1521 if self.calculate_effective_length(line) > config.line_length.get() {
1522 violating_line = paragraph_start + idx;
1523 break;
1524 }
1525 }
1526 (violating_line + 1, violating_line + 1)
1527 }
1528 };
1529
1530 warnings.push(LintWarning {
1531 rule_name: Some(self.name().to_string()),
1532 message: match config.reflow_mode {
1533 ReflowMode::Normalize => format!(
1534 "Paragraph could be normalized to use line length of {} characters",
1535 config.line_length.get()
1536 ),
1537 ReflowMode::SentencePerLine => {
1538 let num_sentences = split_into_sentences(¶graph_text).len();
1539 if paragraph_lines.len() == 1 {
1540 // Single line with multiple sentences
1541 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1542 } else {
1543 let num_lines = paragraph_lines.len();
1544 // Multiple lines - could be split sentences or mixed
1545 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1546 }
1547 },
1548 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1549 },
1550 line: warning_line,
1551 column: 1,
1552 end_line: warning_end_line,
1553 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1554 severity: Severity::Warning,
1555 fix: Some(crate::rule::Fix {
1556 range: byte_range,
1557 replacement,
1558 }),
1559 });
1560 }
1561 }
1562 }
1563
1564 warnings
1565 }
1566
1567 /// Calculate string length based on the configured length mode
1568 fn calculate_string_length(&self, s: &str) -> usize {
1569 match self.config.length_mode {
1570 LengthMode::Chars => s.chars().count(),
1571 LengthMode::Visual => s.width(),
1572 LengthMode::Bytes => s.len(),
1573 }
1574 }
1575
1576 /// Calculate effective line length excluding unbreakable URLs
1577 fn calculate_effective_length(&self, line: &str) -> usize {
1578 if self.config.strict {
1579 // In strict mode, count everything
1580 return self.calculate_string_length(line);
1581 }
1582
1583 // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1584 let bytes = line.as_bytes();
1585 if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1586 return self.calculate_string_length(line);
1587 }
1588
1589 // More precise check for URLs and links
1590 if !line.contains("http") && !line.contains('[') {
1591 return self.calculate_string_length(line);
1592 }
1593
1594 let mut effective_line = line.to_string();
1595
1596 // First handle markdown links to avoid double-counting URLs
1597 // Pattern: [text](very-long-url) -> [text](url)
1598 if line.contains('[') && line.contains("](") {
1599 for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1600 if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1601 && url.as_str().len() > 15
1602 {
1603 let replacement = format!("[{}](url)", text.as_str());
1604 effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1605 }
1606 }
1607 }
1608
1609 // Then replace bare URLs with a placeholder of reasonable length
1610 // This allows lines with long URLs to pass if the rest of the content is reasonable
1611 if effective_line.contains("http") {
1612 for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1613 let url = url_match.as_str();
1614 // Skip if this URL is already part of a markdown link we handled
1615 if !effective_line.contains(&format!("({url})")) {
1616 // Replace URL with placeholder that represents a "reasonable" URL length
1617 // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1618 let placeholder = "x".repeat(15.min(url.len()));
1619 effective_line = effective_line.replacen(url, &placeholder, 1);
1620 }
1621 }
1622 }
1623
1624 self.calculate_string_length(&effective_line)
1625 }
1626}