rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::range_utils::LineIndex;
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9 IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19 split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30 pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35 Self {
36 config: MD013Config {
37 line_length: crate::types::LineLength::new(line_length),
38 code_blocks,
39 tables,
40 headings,
41 paragraphs: true, // Default to true for backwards compatibility
42 strict,
43 reflow: false,
44 reflow_mode: ReflowMode::default(),
45 length_mode: LengthMode::default(),
46 abbreviations: None,
47 },
48 }
49 }
50
51 pub fn from_config_struct(config: MD013Config) -> Self {
52 Self { config }
53 }
54
55 fn should_ignore_line(
56 &self,
57 line: &str,
58 _lines: &[&str],
59 current_line: usize,
60 ctx: &crate::lint_context::LintContext,
61 ) -> bool {
62 if self.config.strict {
63 return false;
64 }
65
66 // Quick check for common patterns before expensive regex
67 let trimmed = line.trim();
68
69 // Only skip if the entire line is a URL (quick check first)
70 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71 return true;
72 }
73
74 // Only skip if the entire line is an image reference (quick check first)
75 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76 return true;
77 }
78
79 // Only skip if the entire line is a link reference (quick check first)
80 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81 return true;
82 }
83
84 // Code blocks with long strings (only check if in code block)
85 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86 && !trimmed.is_empty()
87 && !line.contains(' ')
88 && !line.contains('\t')
89 {
90 return true;
91 }
92
93 false
94 }
95}
96
97impl Rule for MD013LineLength {
98 fn name(&self) -> &'static str {
99 "MD013"
100 }
101
102 fn description(&self) -> &'static str {
103 "Line length should not be excessive"
104 }
105
106 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
107 let content = ctx.content;
108
109 // Fast early return using should_skip
110 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
111 if self.should_skip(ctx)
112 && !(self.config.reflow
113 && (self.config.reflow_mode == ReflowMode::Normalize
114 || self.config.reflow_mode == ReflowMode::SentencePerLine))
115 {
116 return Ok(Vec::new());
117 }
118
119 // Direct implementation without DocumentStructure
120 let mut warnings = Vec::new();
121
122 // Check for inline configuration overrides
123 let inline_config = crate::inline_config::InlineConfig::from_content(content);
124 let config_override = inline_config.get_rule_config("MD013");
125
126 // Apply configuration override if present
127 let effective_config = if let Some(json_config) = config_override {
128 if let Some(obj) = json_config.as_object() {
129 let mut config = self.config.clone();
130 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
131 config.line_length = crate::types::LineLength::new(line_length as usize);
132 }
133 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
134 config.code_blocks = code_blocks;
135 }
136 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
137 config.tables = tables;
138 }
139 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
140 config.headings = headings;
141 }
142 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
143 config.strict = strict;
144 }
145 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
146 config.reflow = reflow;
147 }
148 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
149 config.reflow_mode = match reflow_mode {
150 "default" => ReflowMode::Default,
151 "normalize" => ReflowMode::Normalize,
152 "sentence-per-line" => ReflowMode::SentencePerLine,
153 _ => ReflowMode::default(),
154 };
155 }
156 config
157 } else {
158 self.config.clone()
159 }
160 } else {
161 self.config.clone()
162 };
163
164 // Special handling: line_length = 0 means "no line length limit"
165 // Skip all line length checks, but still allow reflow if enabled
166 let skip_length_checks = effective_config.line_length.is_unlimited();
167
168 // Pre-filter lines that could be problematic to avoid processing all lines
169 let mut candidate_lines = Vec::new();
170 if !skip_length_checks {
171 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
172 // Skip front matter - it should never be linted
173 if line_info.in_front_matter {
174 continue;
175 }
176
177 // Quick length check first
178 if line_info.byte_len > effective_config.line_length.get() {
179 candidate_lines.push(line_idx);
180 }
181 }
182 }
183
184 // If no candidate lines and not in normalize or sentence-per-line mode, early return
185 if candidate_lines.is_empty()
186 && !(effective_config.reflow
187 && (effective_config.reflow_mode == ReflowMode::Normalize
188 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
189 {
190 return Ok(warnings);
191 }
192
193 // Use ctx.lines if available for better performance
194 let lines: Vec<&str> = if !ctx.lines.is_empty() {
195 ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
196 } else {
197 content.lines().collect()
198 };
199
200 // Create a quick lookup set for heading lines
201 // We need this for both the heading skip check AND the paragraphs check
202 let heading_lines_set: std::collections::HashSet<usize> = ctx
203 .lines
204 .iter()
205 .enumerate()
206 .filter(|(_, line)| line.heading.is_some())
207 .map(|(idx, _)| idx + 1)
208 .collect();
209
210 // Use pre-computed table blocks from context
211 // We need this for both the table skip check AND the paragraphs check
212 let table_blocks = &ctx.table_blocks;
213 let mut table_lines_set = std::collections::HashSet::new();
214 for table in table_blocks {
215 table_lines_set.insert(table.header_line + 1);
216 table_lines_set.insert(table.delimiter_line + 1);
217 for &line in &table.content_lines {
218 table_lines_set.insert(line + 1);
219 }
220 }
221
222 // Process candidate lines for line length checks
223 for &line_idx in &candidate_lines {
224 let line_number = line_idx + 1;
225 let line = lines[line_idx];
226
227 // Calculate effective length excluding unbreakable URLs
228 let effective_length = self.calculate_effective_length(line);
229
230 // Use single line length limit for all content
231 let line_limit = effective_config.line_length.get();
232
233 // Skip short lines immediately (double-check after effective length calculation)
234 if effective_length <= line_limit {
235 continue;
236 }
237
238 // Skip mkdocstrings blocks (already handled by LintContext)
239 if ctx.lines[line_idx].in_mkdocstrings {
240 continue;
241 }
242
243 // Skip various block types efficiently
244 if !effective_config.strict {
245 // Skip setext heading underlines
246 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
247 continue;
248 }
249
250 // Skip block elements according to config flags
251 // The flags mean: true = check these elements, false = skip these elements
252 // So we skip when the flag is FALSE and the line is in that element type
253 if (!effective_config.headings && heading_lines_set.contains(&line_number))
254 || (!effective_config.code_blocks
255 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
256 || (!effective_config.tables && table_lines_set.contains(&line_number))
257 || ctx.lines[line_number - 1].blockquote.is_some()
258 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
259 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
260 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
261 {
262 continue;
263 }
264
265 // Check if this is a paragraph/regular text line
266 // If paragraphs = false, skip lines that are NOT in special blocks
267 if !effective_config.paragraphs {
268 let is_special_block = heading_lines_set.contains(&line_number)
269 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
270 || table_lines_set.contains(&line_number)
271 || ctx.lines[line_number - 1].blockquote.is_some()
272 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
273 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
274 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block);
275
276 // Skip regular paragraph text when paragraphs = false
277 if !is_special_block {
278 continue;
279 }
280 }
281
282 // Skip lines that are only a URL, image ref, or link ref
283 if self.should_ignore_line(line, &lines, line_idx, ctx) {
284 continue;
285 }
286 }
287
288 // In sentence-per-line mode, check if this is a single long sentence
289 // If so, emit a warning without a fix (user must manually rephrase)
290 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
291 let sentences = split_into_sentences(line.trim());
292 if sentences.len() == 1 {
293 // Single sentence that's too long - warn but don't auto-fix
294 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
295
296 let (start_line, start_col, end_line, end_col) =
297 calculate_excess_range(line_number, line, line_limit);
298
299 warnings.push(LintWarning {
300 rule_name: Some(self.name().to_string()),
301 message,
302 line: start_line,
303 column: start_col,
304 end_line,
305 end_column: end_col,
306 severity: Severity::Warning,
307 fix: None, // No auto-fix for long single sentences
308 });
309 continue;
310 }
311 // Multiple sentences will be handled by paragraph-based reflow
312 continue;
313 }
314
315 // Don't provide fix for individual lines when reflow is enabled
316 // Paragraph-based fixes will be handled separately
317 let fix = None;
318
319 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
320
321 // Calculate precise character range for the excess portion
322 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
323
324 warnings.push(LintWarning {
325 rule_name: Some(self.name().to_string()),
326 message,
327 line: start_line,
328 column: start_col,
329 end_line,
330 end_column: end_col,
331 severity: Severity::Warning,
332 fix,
333 });
334 }
335
336 // If reflow is enabled, generate paragraph-based fixes
337 if effective_config.reflow {
338 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
339 // Merge paragraph warnings with line warnings, removing duplicates
340 for pw in paragraph_warnings {
341 // Remove any line warnings that overlap with this paragraph
342 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
343 warnings.push(pw);
344 }
345 }
346
347 Ok(warnings)
348 }
349
350 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
351 // For CLI usage, apply fixes from warnings
352 // LSP will use the warning-based fixes directly
353 let warnings = self.check(ctx)?;
354
355 // If there are no fixes, return content unchanged
356 if !warnings.iter().any(|w| w.fix.is_some()) {
357 return Ok(ctx.content.to_string());
358 }
359
360 // Apply warning-based fixes
361 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
362 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
363 }
364
365 fn as_any(&self) -> &dyn std::any::Any {
366 self
367 }
368
369 fn category(&self) -> RuleCategory {
370 RuleCategory::Whitespace
371 }
372
373 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
374 // Skip if content is empty
375 if ctx.content.is_empty() {
376 return true;
377 }
378
379 // For sentence-per-line or normalize mode, never skip based on line length
380 if self.config.reflow
381 && (self.config.reflow_mode == ReflowMode::SentencePerLine
382 || self.config.reflow_mode == ReflowMode::Normalize)
383 {
384 return false;
385 }
386
387 // Quick check: if total content is shorter than line limit, definitely skip
388 if ctx.content.len() <= self.config.line_length.get() {
389 return true;
390 }
391
392 // Use more efficient check - any() with early termination instead of all()
393 !ctx.lines
394 .iter()
395 .any(|line| line.byte_len > self.config.line_length.get())
396 }
397
398 fn default_config_section(&self) -> Option<(String, toml::Value)> {
399 let default_config = MD013Config::default();
400 let json_value = serde_json::to_value(&default_config).ok()?;
401 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
402
403 if let toml::Value::Table(table) = toml_value {
404 if !table.is_empty() {
405 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
406 } else {
407 None
408 }
409 } else {
410 None
411 }
412 }
413
414 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
415 let mut aliases = std::collections::HashMap::new();
416 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
417 Some(aliases)
418 }
419
420 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
421 where
422 Self: Sized,
423 {
424 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
425 // Special handling for line_length from global config
426 if rule_config.line_length.get() == 80 {
427 // default value
428 rule_config.line_length = crate::types::LineLength::new(config.global.line_length as usize);
429 }
430 Box::new(Self::from_config_struct(rule_config))
431 }
432}
433
434impl MD013LineLength {
435 /// Generate paragraph-based fixes
436 fn generate_paragraph_fixes(
437 &self,
438 ctx: &crate::lint_context::LintContext,
439 config: &MD013Config,
440 lines: &[&str],
441 ) -> Vec<LintWarning> {
442 let mut warnings = Vec::new();
443 let line_index = LineIndex::new(ctx.content);
444
445 let mut i = 0;
446 while i < lines.len() {
447 let line_num = i + 1;
448
449 // Skip special structures
450 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
451 info.in_code_block
452 || info.in_front_matter
453 || info.in_html_block
454 || info.in_html_comment
455 || info.in_esm_block
456 });
457
458 if should_skip_due_to_line_info
459 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
460 || lines[i].trim().starts_with('#')
461 || TableUtils::is_potential_table_row(lines[i])
462 || lines[i].trim().is_empty()
463 || is_horizontal_rule(lines[i].trim())
464 || is_template_directive_only(lines[i])
465 {
466 i += 1;
467 continue;
468 }
469
470 // Helper function to detect semantic line markers
471 let is_semantic_line = |content: &str| -> bool {
472 let trimmed = content.trim_start();
473 let semantic_markers = [
474 "NOTE:",
475 "WARNING:",
476 "IMPORTANT:",
477 "CAUTION:",
478 "TIP:",
479 "DANGER:",
480 "HINT:",
481 "INFO:",
482 ];
483 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
484 };
485
486 // Helper function to detect fence markers (opening or closing)
487 let is_fence_marker = |content: &str| -> bool {
488 let trimmed = content.trim_start();
489 trimmed.starts_with("```") || trimmed.starts_with("~~~")
490 };
491
492 // Check if this is a list item - handle it specially
493 let trimmed = lines[i].trim();
494 if is_list_item(trimmed) {
495 // Collect the entire list item including continuation lines
496 let list_start = i;
497 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
498 let marker_len = marker.len();
499
500 // Track lines and their types (content, code block, fence, nested list)
501 #[derive(Clone)]
502 enum LineType {
503 Content(String),
504 CodeBlock(String, usize), // content and original indent
505 NestedListItem(String, usize), // full line content and original indent
506 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
507 Empty,
508 }
509
510 let mut actual_indent: Option<usize> = None;
511 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
512 i += 1;
513
514 // Collect continuation lines using ctx.lines for metadata
515 while i < lines.len() {
516 let line_info = &ctx.lines[i];
517
518 // Use pre-computed is_blank from ctx
519 if line_info.is_blank {
520 // Empty line - check if next line is indented (part of list item)
521 if i + 1 < lines.len() {
522 let next_info = &ctx.lines[i + 1];
523
524 // Check if next line is indented enough to be continuation
525 if !next_info.is_blank && next_info.indent >= marker_len {
526 // This blank line is between paragraphs/blocks in the list item
527 list_item_lines.push(LineType::Empty);
528 i += 1;
529 continue;
530 }
531 }
532 // No indented line after blank, end of list item
533 break;
534 }
535
536 // Use pre-computed indent from ctx
537 let indent = line_info.indent;
538
539 // Valid continuation must be indented at least marker_len
540 if indent >= marker_len {
541 let trimmed = line_info.content(ctx.content).trim();
542
543 // Use pre-computed in_code_block from ctx
544 if line_info.in_code_block {
545 list_item_lines.push(LineType::CodeBlock(
546 line_info.content(ctx.content)[indent..].to_string(),
547 indent,
548 ));
549 i += 1;
550 continue;
551 }
552
553 // Check if this is a SIBLING list item (breaks parent)
554 // Nested lists are indented >= marker_len and are PART of the parent item
555 // Siblings are at indent < marker_len (at or before parent marker)
556 if is_list_item(trimmed) && indent < marker_len {
557 // This is a sibling item at same or higher level - end parent item
558 break;
559 }
560
561 // Check if this is a NESTED list item marker
562 // Nested lists should be processed separately UNLESS they're part of a
563 // multi-paragraph list item (indicated by a blank line before them OR
564 // it's a continuation of an already-started nested list)
565 if is_list_item(trimmed) && indent >= marker_len {
566 // Check if there was a blank line before this (multi-paragraph context)
567 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
568
569 // Check if we've already seen nested list content (another nested item)
570 let has_nested_content = list_item_lines.iter().any(|line| {
571 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
572 || matches!(line, LineType::NestedListItem(_, _))
573 });
574
575 if !has_blank_before && !has_nested_content {
576 // Single-paragraph context with no prior nested items: starts a new item
577 // End parent collection; nested list will be processed next
578 break;
579 }
580 // else: multi-paragraph context or continuation of nested list, keep collecting
581 // Mark this as a nested list item to preserve its structure
582 list_item_lines.push(LineType::NestedListItem(
583 line_info.content(ctx.content)[indent..].to_string(),
584 indent,
585 ));
586 i += 1;
587 continue;
588 }
589
590 // Normal continuation: marker_len to marker_len+3
591 if indent <= marker_len + 3 {
592 // Set actual_indent from first non-code continuation if not set
593 if actual_indent.is_none() {
594 actual_indent = Some(indent);
595 }
596
597 // Extract content (remove indentation and trailing whitespace)
598 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
599 // See: https://github.com/rvben/rumdl/issues/76
600 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
601
602 // Check if this is a fence marker (opening or closing)
603 // These should be treated as code block lines, not paragraph content
604 if is_fence_marker(&content) {
605 list_item_lines.push(LineType::CodeBlock(content, indent));
606 }
607 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
608 else if is_semantic_line(&content) {
609 list_item_lines.push(LineType::SemanticLine(content));
610 } else {
611 list_item_lines.push(LineType::Content(content));
612 }
613 i += 1;
614 } else {
615 // indent >= marker_len + 4: indented code block
616 list_item_lines.push(LineType::CodeBlock(
617 line_info.content(ctx.content)[indent..].to_string(),
618 indent,
619 ));
620 i += 1;
621 }
622 } else {
623 // Not indented enough, end of list item
624 break;
625 }
626 }
627
628 // Use detected indent or fallback to marker length
629 let indent_size = actual_indent.unwrap_or(marker_len);
630 let expected_indent = " ".repeat(indent_size);
631
632 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
633 #[derive(Clone)]
634 enum Block {
635 Paragraph(Vec<String>),
636 Code {
637 lines: Vec<(String, usize)>, // (content, indent) pairs
638 has_preceding_blank: bool, // Whether there was a blank line before this block
639 },
640 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
641 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
642 Html {
643 lines: Vec<String>, // HTML content preserved exactly as-is
644 has_preceding_blank: bool, // Whether there was a blank line before this block
645 },
646 }
647
648 // HTML tag detection helpers
649 // Block-level HTML tags that should trigger HTML block detection
650 const BLOCK_LEVEL_TAGS: &[&str] = &[
651 "div",
652 "details",
653 "summary",
654 "section",
655 "article",
656 "header",
657 "footer",
658 "nav",
659 "aside",
660 "main",
661 "table",
662 "thead",
663 "tbody",
664 "tfoot",
665 "tr",
666 "td",
667 "th",
668 "ul",
669 "ol",
670 "li",
671 "dl",
672 "dt",
673 "dd",
674 "pre",
675 "blockquote",
676 "figure",
677 "figcaption",
678 "form",
679 "fieldset",
680 "legend",
681 "hr",
682 "p",
683 "h1",
684 "h2",
685 "h3",
686 "h4",
687 "h5",
688 "h6",
689 "style",
690 "script",
691 "noscript",
692 ];
693
694 fn is_block_html_opening_tag(line: &str) -> Option<String> {
695 let trimmed = line.trim();
696
697 // Check for HTML comments
698 if trimmed.starts_with("<!--") {
699 return Some("!--".to_string());
700 }
701
702 // Check for opening tags
703 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
704 // Extract tag name from <tagname ...> or <tagname>
705 let after_bracket = &trimmed[1..];
706 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
707 let tag_name = after_bracket[..end].to_lowercase();
708
709 // Only treat as block if it's a known block-level tag
710 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
711 return Some(tag_name);
712 }
713 }
714 }
715 None
716 }
717
718 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
719 let trimmed = line.trim();
720
721 // Special handling for HTML comments
722 if tag_name == "!--" {
723 return trimmed.ends_with("-->");
724 }
725
726 // Check for closing tags: </tagname> or </tagname ...>
727 trimmed.starts_with(&format!("</{tag_name}>"))
728 || trimmed.starts_with(&format!("</{tag_name} "))
729 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
730 }
731
732 fn is_self_closing_tag(line: &str) -> bool {
733 let trimmed = line.trim();
734 trimmed.ends_with("/>")
735 }
736
737 let mut blocks: Vec<Block> = Vec::new();
738 let mut current_paragraph: Vec<String> = Vec::new();
739 let mut current_code_block: Vec<(String, usize)> = Vec::new();
740 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
741 let mut current_html_block: Vec<String> = Vec::new();
742 let mut html_tag_stack: Vec<String> = Vec::new();
743 let mut in_code = false;
744 let mut in_nested_list = false;
745 let mut in_html_block = false;
746 let mut had_preceding_blank = false; // Track if we just saw an empty line
747 let mut code_block_has_preceding_blank = false; // Track blank before current code block
748 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
749
750 for line in &list_item_lines {
751 match line {
752 LineType::Empty => {
753 if in_code {
754 current_code_block.push((String::new(), 0));
755 } else if in_nested_list {
756 current_nested_list.push((String::new(), 0));
757 } else if in_html_block {
758 // Allow blank lines inside HTML blocks
759 current_html_block.push(String::new());
760 } else if !current_paragraph.is_empty() {
761 blocks.push(Block::Paragraph(current_paragraph.clone()));
762 current_paragraph.clear();
763 }
764 // Mark that we saw a blank line
765 had_preceding_blank = true;
766 }
767 LineType::Content(content) => {
768 // Check if we're currently in an HTML block
769 if in_html_block {
770 current_html_block.push(content.clone());
771
772 // Check if this line closes any open HTML tags
773 if let Some(last_tag) = html_tag_stack.last() {
774 if is_html_closing_tag(content, last_tag) {
775 html_tag_stack.pop();
776
777 // If stack is empty, HTML block is complete
778 if html_tag_stack.is_empty() {
779 blocks.push(Block::Html {
780 lines: current_html_block.clone(),
781 has_preceding_blank: html_block_has_preceding_blank,
782 });
783 current_html_block.clear();
784 in_html_block = false;
785 }
786 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
787 // Nested opening tag within HTML block
788 if !is_self_closing_tag(content) {
789 html_tag_stack.push(new_tag);
790 }
791 }
792 }
793 had_preceding_blank = false;
794 } else {
795 // Not in HTML block - check if this line starts one
796 if let Some(tag_name) = is_block_html_opening_tag(content) {
797 // Flush current paragraph before starting HTML block
798 if in_code {
799 blocks.push(Block::Code {
800 lines: current_code_block.clone(),
801 has_preceding_blank: code_block_has_preceding_blank,
802 });
803 current_code_block.clear();
804 in_code = false;
805 } else if in_nested_list {
806 blocks.push(Block::NestedList(current_nested_list.clone()));
807 current_nested_list.clear();
808 in_nested_list = false;
809 } else if !current_paragraph.is_empty() {
810 blocks.push(Block::Paragraph(current_paragraph.clone()));
811 current_paragraph.clear();
812 }
813
814 // Start new HTML block
815 in_html_block = true;
816 html_block_has_preceding_blank = had_preceding_blank;
817 current_html_block.push(content.clone());
818
819 // Check if it's self-closing or needs a closing tag
820 if is_self_closing_tag(content) {
821 // Self-closing tag - complete the HTML block immediately
822 blocks.push(Block::Html {
823 lines: current_html_block.clone(),
824 has_preceding_blank: html_block_has_preceding_blank,
825 });
826 current_html_block.clear();
827 in_html_block = false;
828 } else {
829 // Regular opening tag - push to stack
830 html_tag_stack.push(tag_name);
831 }
832 } else {
833 // Regular content line - add to paragraph
834 if in_code {
835 // Switching from code to content
836 blocks.push(Block::Code {
837 lines: current_code_block.clone(),
838 has_preceding_blank: code_block_has_preceding_blank,
839 });
840 current_code_block.clear();
841 in_code = false;
842 } else if in_nested_list {
843 // Switching from nested list to content
844 blocks.push(Block::NestedList(current_nested_list.clone()));
845 current_nested_list.clear();
846 in_nested_list = false;
847 }
848 current_paragraph.push(content.clone());
849 }
850 had_preceding_blank = false; // Reset after content
851 }
852 }
853 LineType::CodeBlock(content, indent) => {
854 if in_nested_list {
855 // Switching from nested list to code
856 blocks.push(Block::NestedList(current_nested_list.clone()));
857 current_nested_list.clear();
858 in_nested_list = false;
859 } else if in_html_block {
860 // Switching from HTML block to code (shouldn't happen normally, but handle it)
861 blocks.push(Block::Html {
862 lines: current_html_block.clone(),
863 has_preceding_blank: html_block_has_preceding_blank,
864 });
865 current_html_block.clear();
866 html_tag_stack.clear();
867 in_html_block = false;
868 }
869 if !in_code {
870 // Switching from content to code
871 if !current_paragraph.is_empty() {
872 blocks.push(Block::Paragraph(current_paragraph.clone()));
873 current_paragraph.clear();
874 }
875 in_code = true;
876 // Record whether there was a blank line before this code block
877 code_block_has_preceding_blank = had_preceding_blank;
878 }
879 current_code_block.push((content.clone(), *indent));
880 had_preceding_blank = false; // Reset after code
881 }
882 LineType::NestedListItem(content, indent) => {
883 if in_code {
884 // Switching from code to nested list
885 blocks.push(Block::Code {
886 lines: current_code_block.clone(),
887 has_preceding_blank: code_block_has_preceding_blank,
888 });
889 current_code_block.clear();
890 in_code = false;
891 } else if in_html_block {
892 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
893 blocks.push(Block::Html {
894 lines: current_html_block.clone(),
895 has_preceding_blank: html_block_has_preceding_blank,
896 });
897 current_html_block.clear();
898 html_tag_stack.clear();
899 in_html_block = false;
900 }
901 if !in_nested_list {
902 // Switching from content to nested list
903 if !current_paragraph.is_empty() {
904 blocks.push(Block::Paragraph(current_paragraph.clone()));
905 current_paragraph.clear();
906 }
907 in_nested_list = true;
908 }
909 current_nested_list.push((content.clone(), *indent));
910 had_preceding_blank = false; // Reset after nested list
911 }
912 LineType::SemanticLine(content) => {
913 // Semantic lines are standalone - flush any current block and add as separate block
914 if in_code {
915 blocks.push(Block::Code {
916 lines: current_code_block.clone(),
917 has_preceding_blank: code_block_has_preceding_blank,
918 });
919 current_code_block.clear();
920 in_code = false;
921 } else if in_nested_list {
922 blocks.push(Block::NestedList(current_nested_list.clone()));
923 current_nested_list.clear();
924 in_nested_list = false;
925 } else if in_html_block {
926 blocks.push(Block::Html {
927 lines: current_html_block.clone(),
928 has_preceding_blank: html_block_has_preceding_blank,
929 });
930 current_html_block.clear();
931 html_tag_stack.clear();
932 in_html_block = false;
933 } else if !current_paragraph.is_empty() {
934 blocks.push(Block::Paragraph(current_paragraph.clone()));
935 current_paragraph.clear();
936 }
937 // Add semantic line as its own block
938 blocks.push(Block::SemanticLine(content.clone()));
939 had_preceding_blank = false; // Reset after semantic line
940 }
941 }
942 }
943
944 // Push remaining block
945 if in_code && !current_code_block.is_empty() {
946 blocks.push(Block::Code {
947 lines: current_code_block,
948 has_preceding_blank: code_block_has_preceding_blank,
949 });
950 } else if in_nested_list && !current_nested_list.is_empty() {
951 blocks.push(Block::NestedList(current_nested_list));
952 } else if in_html_block && !current_html_block.is_empty() {
953 // If we still have an unclosed HTML block, push it anyway
954 // (malformed HTML - missing closing tag)
955 blocks.push(Block::Html {
956 lines: current_html_block,
957 has_preceding_blank: html_block_has_preceding_blank,
958 });
959 } else if !current_paragraph.is_empty() {
960 blocks.push(Block::Paragraph(current_paragraph));
961 }
962
963 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
964 let content_lines: Vec<String> = list_item_lines
965 .iter()
966 .filter_map(|line| {
967 if let LineType::Content(s) = line {
968 Some(s.clone())
969 } else {
970 None
971 }
972 })
973 .collect();
974
975 // Check if we need to reflow this list item
976 // We check the combined content to see if it exceeds length limits
977 let combined_content = content_lines.join(" ").trim().to_string();
978 let full_line = format!("{marker}{combined_content}");
979
980 // Helper to check if we should reflow in normalize mode
981 let should_normalize = || {
982 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
983 // DO normalize if it has plain text content that spans multiple lines
984 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
985 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
986 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
987 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
988
989 // If we have nested lists, code blocks, or semantic lines but no paragraphs, don't normalize
990 if (has_nested_lists || has_code_blocks || has_semantic_lines) && !has_paragraphs {
991 return false;
992 }
993
994 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
995 if has_paragraphs {
996 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
997 if paragraph_count > 1 {
998 // Multiple paragraph blocks should be normalized
999 return true;
1000 }
1001
1002 // Single paragraph block: normalize if it has multiple content lines
1003 if content_lines.len() > 1 {
1004 return true;
1005 }
1006 }
1007
1008 false
1009 };
1010
1011 let needs_reflow = match config.reflow_mode {
1012 ReflowMode::Normalize => {
1013 // Only reflow if:
1014 // 1. The combined line would exceed the limit, OR
1015 // 2. The list item should be normalized (has multi-line plain text)
1016 let combined_length = self.calculate_effective_length(&full_line);
1017 if combined_length > config.line_length.get() {
1018 true
1019 } else {
1020 should_normalize()
1021 }
1022 }
1023 ReflowMode::SentencePerLine => {
1024 // Check if list item has multiple sentences
1025 let sentences = split_into_sentences(&combined_content);
1026 sentences.len() > 1
1027 }
1028 ReflowMode::Default => {
1029 // In default mode, only reflow if lines exceed limit
1030 self.calculate_effective_length(&full_line) > config.line_length.get()
1031 }
1032 };
1033
1034 if needs_reflow {
1035 let start_range = line_index.whole_line_range(list_start + 1);
1036 let end_line = i - 1;
1037 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1038 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1039 } else {
1040 line_index.whole_line_range(end_line + 1)
1041 };
1042 let byte_range = start_range.start..end_range.end;
1043
1044 // Reflow each block (paragraphs only, preserve code blocks)
1045 // When line_length = 0 (no limit), use a very large value for reflow
1046 let reflow_line_length = if config.line_length.is_unlimited() {
1047 usize::MAX
1048 } else {
1049 config.line_length.get().saturating_sub(indent_size).max(1)
1050 };
1051 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1052 line_length: reflow_line_length,
1053 break_on_sentences: true,
1054 preserve_breaks: false,
1055 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1056 abbreviations: config.abbreviations.clone(),
1057 };
1058
1059 let mut result: Vec<String> = Vec::new();
1060 let mut is_first_block = true;
1061
1062 for (block_idx, block) in blocks.iter().enumerate() {
1063 match block {
1064 Block::Paragraph(para_lines) => {
1065 // Split the paragraph into segments at hard break boundaries
1066 // Each segment can be reflowed independently
1067 let segments = split_into_segments(para_lines);
1068
1069 for (segment_idx, segment) in segments.iter().enumerate() {
1070 // Check if this segment ends with a hard break and what type
1071 let hard_break_type = segment.last().and_then(|line| {
1072 let line = line.strip_suffix('\r').unwrap_or(line);
1073 if line.ends_with('\\') {
1074 Some("\\")
1075 } else if line.ends_with(" ") {
1076 Some(" ")
1077 } else {
1078 None
1079 }
1080 });
1081
1082 // Join and reflow the segment (removing the hard break marker for processing)
1083 let segment_for_reflow: Vec<String> = segment
1084 .iter()
1085 .map(|line| {
1086 // Strip hard break marker (2 spaces or backslash) for reflow processing
1087 if line.ends_with('\\') {
1088 line[..line.len() - 1].trim_end().to_string()
1089 } else if line.ends_with(" ") {
1090 line[..line.len() - 2].trim_end().to_string()
1091 } else {
1092 line.clone()
1093 }
1094 })
1095 .collect();
1096
1097 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1098 if !segment_text.is_empty() {
1099 let reflowed =
1100 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1101
1102 if is_first_block && segment_idx == 0 {
1103 // First segment of first block starts with marker
1104 result.push(format!("{marker}{}", reflowed[0]));
1105 for line in reflowed.iter().skip(1) {
1106 result.push(format!("{expected_indent}{line}"));
1107 }
1108 is_first_block = false;
1109 } else {
1110 // Subsequent segments
1111 for line in reflowed {
1112 result.push(format!("{expected_indent}{line}"));
1113 }
1114 }
1115
1116 // If this segment had a hard break, add it back to the last line
1117 // Preserve the original hard break format (backslash or two spaces)
1118 if let Some(break_marker) = hard_break_type
1119 && let Some(last_line) = result.last_mut()
1120 {
1121 last_line.push_str(break_marker);
1122 }
1123 }
1124 }
1125
1126 // Add blank line after paragraph block if there's a next block
1127 // BUT: check if next block is a code block that doesn't want a preceding blank
1128 if block_idx < blocks.len() - 1 {
1129 let next_block = &blocks[block_idx + 1];
1130 let should_add_blank = match next_block {
1131 Block::Code {
1132 has_preceding_blank, ..
1133 } => *has_preceding_blank,
1134 _ => true, // For all other blocks, add blank line
1135 };
1136 if should_add_blank {
1137 result.push(String::new());
1138 }
1139 }
1140 }
1141 Block::Code {
1142 lines: code_lines,
1143 has_preceding_blank: _,
1144 } => {
1145 // Preserve code blocks as-is with original indentation
1146 // NOTE: Blank line before code block is handled by the previous block
1147 // (see paragraph block's logic above)
1148
1149 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1150 if is_first_block && idx == 0 {
1151 // First line of first block gets marker
1152 result.push(format!(
1153 "{marker}{}",
1154 " ".repeat(orig_indent - marker_len) + content
1155 ));
1156 is_first_block = false;
1157 } else if content.is_empty() {
1158 result.push(String::new());
1159 } else {
1160 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1161 }
1162 }
1163 }
1164 Block::NestedList(nested_items) => {
1165 // Preserve nested list items as-is with original indentation
1166 if !is_first_block {
1167 result.push(String::new());
1168 }
1169
1170 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1171 if is_first_block && idx == 0 {
1172 // First line of first block gets marker
1173 result.push(format!(
1174 "{marker}{}",
1175 " ".repeat(orig_indent - marker_len) + content
1176 ));
1177 is_first_block = false;
1178 } else if content.is_empty() {
1179 result.push(String::new());
1180 } else {
1181 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1182 }
1183 }
1184
1185 // Add blank line after nested list if there's a next block
1186 // Check if next block is a code block that doesn't want a preceding blank
1187 if block_idx < blocks.len() - 1 {
1188 let next_block = &blocks[block_idx + 1];
1189 let should_add_blank = match next_block {
1190 Block::Code {
1191 has_preceding_blank, ..
1192 } => *has_preceding_blank,
1193 _ => true, // For all other blocks, add blank line
1194 };
1195 if should_add_blank {
1196 result.push(String::new());
1197 }
1198 }
1199 }
1200 Block::SemanticLine(content) => {
1201 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1202 // Add blank line before if not first block
1203 if !is_first_block {
1204 result.push(String::new());
1205 }
1206
1207 if is_first_block {
1208 // First block starts with marker
1209 result.push(format!("{marker}{content}"));
1210 is_first_block = false;
1211 } else {
1212 // Subsequent blocks use expected indent
1213 result.push(format!("{expected_indent}{content}"));
1214 }
1215
1216 // Add blank line after semantic line if there's a next block
1217 // Check if next block is a code block that doesn't want a preceding blank
1218 if block_idx < blocks.len() - 1 {
1219 let next_block = &blocks[block_idx + 1];
1220 let should_add_blank = match next_block {
1221 Block::Code {
1222 has_preceding_blank, ..
1223 } => *has_preceding_blank,
1224 _ => true, // For all other blocks, add blank line
1225 };
1226 if should_add_blank {
1227 result.push(String::new());
1228 }
1229 }
1230 }
1231 Block::Html {
1232 lines: html_lines,
1233 has_preceding_blank: _,
1234 } => {
1235 // Preserve HTML blocks exactly as-is with original indentation
1236 // NOTE: Blank line before HTML block is handled by the previous block
1237
1238 for (idx, line) in html_lines.iter().enumerate() {
1239 if is_first_block && idx == 0 {
1240 // First line of first block gets marker
1241 result.push(format!("{marker}{line}"));
1242 is_first_block = false;
1243 } else if line.is_empty() {
1244 // Preserve blank lines inside HTML blocks
1245 result.push(String::new());
1246 } else {
1247 // Preserve lines with their original content (already includes indentation)
1248 result.push(format!("{expected_indent}{line}"));
1249 }
1250 }
1251
1252 // Add blank line after HTML block if there's a next block
1253 if block_idx < blocks.len() - 1 {
1254 let next_block = &blocks[block_idx + 1];
1255 let should_add_blank = match next_block {
1256 Block::Code {
1257 has_preceding_blank, ..
1258 } => *has_preceding_blank,
1259 Block::Html {
1260 has_preceding_blank, ..
1261 } => *has_preceding_blank,
1262 _ => true, // For all other blocks, add blank line
1263 };
1264 if should_add_blank {
1265 result.push(String::new());
1266 }
1267 }
1268 }
1269 }
1270 }
1271
1272 let reflowed_text = result.join("\n");
1273
1274 // Preserve trailing newline
1275 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1276 format!("{reflowed_text}\n")
1277 } else {
1278 reflowed_text
1279 };
1280
1281 // Get the original text to compare
1282 let original_text = &ctx.content[byte_range.clone()];
1283
1284 // Only generate a warning if the replacement is different from the original
1285 if original_text != replacement {
1286 // Generate an appropriate message based on why reflow is needed
1287 let message = match config.reflow_mode {
1288 ReflowMode::SentencePerLine => {
1289 let num_sentences = split_into_sentences(&combined_content).len();
1290 let num_lines = content_lines.len();
1291 if num_lines == 1 {
1292 // Single line with multiple sentences
1293 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1294 } else {
1295 // Multiple lines - could be split sentences or mixed
1296 format!(
1297 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1298 )
1299 }
1300 }
1301 ReflowMode::Normalize => {
1302 let combined_length = self.calculate_effective_length(&full_line);
1303 if combined_length > config.line_length.get() {
1304 format!(
1305 "Line length {} exceeds {} characters",
1306 combined_length,
1307 config.line_length.get()
1308 )
1309 } else {
1310 "Multi-line content can be normalized".to_string()
1311 }
1312 }
1313 ReflowMode::Default => {
1314 let combined_length = self.calculate_effective_length(&full_line);
1315 format!(
1316 "Line length {} exceeds {} characters",
1317 combined_length,
1318 config.line_length.get()
1319 )
1320 }
1321 };
1322
1323 warnings.push(LintWarning {
1324 rule_name: Some(self.name().to_string()),
1325 message,
1326 line: list_start + 1,
1327 column: 1,
1328 end_line: end_line + 1,
1329 end_column: lines[end_line].len() + 1,
1330 severity: Severity::Warning,
1331 fix: Some(crate::rule::Fix {
1332 range: byte_range,
1333 replacement,
1334 }),
1335 });
1336 }
1337 }
1338 continue;
1339 }
1340
1341 // Found start of a paragraph - collect all lines in it
1342 let paragraph_start = i;
1343 let mut paragraph_lines = vec![lines[i]];
1344 i += 1;
1345
1346 while i < lines.len() {
1347 let next_line = lines[i];
1348 let next_line_num = i + 1;
1349 let next_trimmed = next_line.trim();
1350
1351 // Stop at paragraph boundaries
1352 if next_trimmed.is_empty()
1353 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1354 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1355 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1356 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1357 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1358 || (next_line_num > 0
1359 && next_line_num <= ctx.lines.len()
1360 && ctx.lines[next_line_num - 1].blockquote.is_some())
1361 || next_trimmed.starts_with('#')
1362 || TableUtils::is_potential_table_row(next_line)
1363 || is_list_item(next_trimmed)
1364 || is_horizontal_rule(next_trimmed)
1365 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1366 || is_template_directive_only(next_line)
1367 {
1368 break;
1369 }
1370
1371 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1372 if i > 0 && has_hard_break(lines[i - 1]) {
1373 // Don't include lines after hard breaks in the same paragraph
1374 break;
1375 }
1376
1377 paragraph_lines.push(next_line);
1378 i += 1;
1379 }
1380
1381 // Combine paragraph lines into a single string for processing
1382 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1383 let paragraph_text = paragraph_lines.join(" ");
1384
1385 // Skip reflowing if this paragraph contains definition list items
1386 // Definition lists are multi-line structures that should not be joined
1387 let contains_definition_list = paragraph_lines
1388 .iter()
1389 .any(|line| crate::utils::is_definition_list_item(line));
1390
1391 if contains_definition_list {
1392 // Don't reflow definition lists - skip this paragraph
1393 i = paragraph_start + paragraph_lines.len();
1394 continue;
1395 }
1396
1397 // Check if this paragraph needs reflowing
1398 let needs_reflow = match config.reflow_mode {
1399 ReflowMode::Normalize => {
1400 // In normalize mode, reflow multi-line paragraphs
1401 paragraph_lines.len() > 1
1402 }
1403 ReflowMode::SentencePerLine => {
1404 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1405 // Note: we check the joined text because sentences can span multiple lines
1406 let sentences = split_into_sentences(¶graph_text);
1407
1408 // Always reflow if multiple sentences on one line
1409 if sentences.len() > 1 {
1410 true
1411 } else if paragraph_lines.len() > 1 {
1412 // For single-sentence paragraphs spanning multiple lines:
1413 // Reflow if they COULD fit on one line (respecting line-length constraint)
1414 if config.line_length.is_unlimited() {
1415 // No line-length constraint - always join single sentences
1416 true
1417 } else {
1418 // Only join if it fits within line-length
1419 let effective_length = self.calculate_effective_length(¶graph_text);
1420 effective_length <= config.line_length.get()
1421 }
1422 } else {
1423 false
1424 }
1425 }
1426 ReflowMode::Default => {
1427 // In default mode, only reflow if lines exceed limit
1428 paragraph_lines
1429 .iter()
1430 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1431 }
1432 };
1433
1434 if needs_reflow {
1435 // Calculate byte range for this paragraph
1436 // Use whole_line_range for each line and combine
1437 let start_range = line_index.whole_line_range(paragraph_start + 1);
1438 let end_line = paragraph_start + paragraph_lines.len() - 1;
1439
1440 // For the last line, we want to preserve any trailing newline
1441 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1442 // Last line without trailing newline - use line_text_range
1443 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1444 } else {
1445 // Not the last line or has trailing newline - use whole_line_range
1446 line_index.whole_line_range(end_line + 1)
1447 };
1448
1449 let byte_range = start_range.start..end_range.end;
1450
1451 // Check if the paragraph ends with a hard break and what type
1452 let hard_break_type = paragraph_lines.last().and_then(|line| {
1453 let line = line.strip_suffix('\r').unwrap_or(line);
1454 if line.ends_with('\\') {
1455 Some("\\")
1456 } else if line.ends_with(" ") {
1457 Some(" ")
1458 } else {
1459 None
1460 }
1461 });
1462
1463 // Reflow the paragraph
1464 // When line_length = 0 (no limit), use a very large value for reflow
1465 let reflow_line_length = if config.line_length.is_unlimited() {
1466 usize::MAX
1467 } else {
1468 config.line_length.get()
1469 };
1470 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1471 line_length: reflow_line_length,
1472 break_on_sentences: true,
1473 preserve_breaks: false,
1474 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1475 abbreviations: config.abbreviations.clone(),
1476 };
1477 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1478
1479 // If the original paragraph ended with a hard break, preserve it
1480 // Preserve the original hard break format (backslash or two spaces)
1481 if let Some(break_marker) = hard_break_type
1482 && !reflowed.is_empty()
1483 {
1484 let last_idx = reflowed.len() - 1;
1485 if !has_hard_break(&reflowed[last_idx]) {
1486 reflowed[last_idx].push_str(break_marker);
1487 }
1488 }
1489
1490 let reflowed_text = reflowed.join("\n");
1491
1492 // Preserve trailing newline if the original paragraph had one
1493 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1494 format!("{reflowed_text}\n")
1495 } else {
1496 reflowed_text
1497 };
1498
1499 // Get the original text to compare
1500 let original_text = &ctx.content[byte_range.clone()];
1501
1502 // Only generate a warning if the replacement is different from the original
1503 if original_text != replacement {
1504 // Create warning with actual fix
1505 // In default mode, report the specific line that violates
1506 // In normalize mode, report the whole paragraph
1507 // In sentence-per-line mode, report the entire paragraph
1508 let (warning_line, warning_end_line) = match config.reflow_mode {
1509 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1510 ReflowMode::SentencePerLine => {
1511 // Highlight the entire paragraph that needs reformatting
1512 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1513 }
1514 ReflowMode::Default => {
1515 // Find the first line that exceeds the limit
1516 let mut violating_line = paragraph_start;
1517 for (idx, line) in paragraph_lines.iter().enumerate() {
1518 if self.calculate_effective_length(line) > config.line_length.get() {
1519 violating_line = paragraph_start + idx;
1520 break;
1521 }
1522 }
1523 (violating_line + 1, violating_line + 1)
1524 }
1525 };
1526
1527 warnings.push(LintWarning {
1528 rule_name: Some(self.name().to_string()),
1529 message: match config.reflow_mode {
1530 ReflowMode::Normalize => format!(
1531 "Paragraph could be normalized to use line length of {} characters",
1532 config.line_length.get()
1533 ),
1534 ReflowMode::SentencePerLine => {
1535 let num_sentences = split_into_sentences(¶graph_text).len();
1536 if paragraph_lines.len() == 1 {
1537 // Single line with multiple sentences
1538 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1539 } else {
1540 let num_lines = paragraph_lines.len();
1541 // Multiple lines - could be split sentences or mixed
1542 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1543 }
1544 },
1545 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1546 },
1547 line: warning_line,
1548 column: 1,
1549 end_line: warning_end_line,
1550 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1551 severity: Severity::Warning,
1552 fix: Some(crate::rule::Fix {
1553 range: byte_range,
1554 replacement,
1555 }),
1556 });
1557 }
1558 }
1559 }
1560
1561 warnings
1562 }
1563
1564 /// Calculate string length based on the configured length mode
1565 fn calculate_string_length(&self, s: &str) -> usize {
1566 match self.config.length_mode {
1567 LengthMode::Chars => s.chars().count(),
1568 LengthMode::Visual => s.width(),
1569 LengthMode::Bytes => s.len(),
1570 }
1571 }
1572
1573 /// Calculate effective line length excluding unbreakable URLs
1574 fn calculate_effective_length(&self, line: &str) -> usize {
1575 if self.config.strict {
1576 // In strict mode, count everything
1577 return self.calculate_string_length(line);
1578 }
1579
1580 // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1581 let bytes = line.as_bytes();
1582 if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1583 return self.calculate_string_length(line);
1584 }
1585
1586 // More precise check for URLs and links
1587 if !line.contains("http") && !line.contains('[') {
1588 return self.calculate_string_length(line);
1589 }
1590
1591 let mut effective_line = line.to_string();
1592
1593 // First handle markdown links to avoid double-counting URLs
1594 // Pattern: [text](very-long-url) -> [text](url)
1595 if line.contains('[') && line.contains("](") {
1596 for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1597 if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1598 && url.as_str().len() > 15
1599 {
1600 let replacement = format!("[{}](url)", text.as_str());
1601 effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1602 }
1603 }
1604 }
1605
1606 // Then replace bare URLs with a placeholder of reasonable length
1607 // This allows lines with long URLs to pass if the rest of the content is reasonable
1608 if effective_line.contains("http") {
1609 for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1610 let url = url_match.as_str();
1611 // Skip if this URL is already part of a markdown link we handled
1612 if !effective_line.contains(&format!("({url})")) {
1613 // Replace URL with placeholder that represents a "reasonable" URL length
1614 // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1615 let placeholder = "x".repeat(15.min(url.len()));
1616 effective_line = effective_line.replacen(url, &placeholder, 1);
1617 }
1618 }
1619 }
1620
1621 self.calculate_string_length(&effective_line)
1622 }
1623}