rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::range_utils::LineIndex;
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9 IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19 split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30 pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35 Self {
36 config: MD013Config {
37 line_length: crate::types::LineLength::new(line_length),
38 code_blocks,
39 tables,
40 headings,
41 paragraphs: true, // Default to true for backwards compatibility
42 strict,
43 reflow: false,
44 reflow_mode: ReflowMode::default(),
45 length_mode: LengthMode::default(),
46 abbreviations: None,
47 },
48 }
49 }
50
51 pub fn from_config_struct(config: MD013Config) -> Self {
52 Self { config }
53 }
54
55 fn should_ignore_line(
56 &self,
57 line: &str,
58 _lines: &[&str],
59 current_line: usize,
60 ctx: &crate::lint_context::LintContext,
61 ) -> bool {
62 if self.config.strict {
63 return false;
64 }
65
66 // Quick check for common patterns before expensive regex
67 let trimmed = line.trim();
68
69 // Only skip if the entire line is a URL (quick check first)
70 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71 return true;
72 }
73
74 // Only skip if the entire line is an image reference (quick check first)
75 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76 return true;
77 }
78
79 // Only skip if the entire line is a link reference (quick check first)
80 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81 return true;
82 }
83
84 // Code blocks with long strings (only check if in code block)
85 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86 && !trimmed.is_empty()
87 && !line.contains(' ')
88 && !line.contains('\t')
89 {
90 return true;
91 }
92
93 false
94 }
95}
96
97impl Rule for MD013LineLength {
98 fn name(&self) -> &'static str {
99 "MD013"
100 }
101
102 fn description(&self) -> &'static str {
103 "Line length should not be excessive"
104 }
105
106 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
107 let content = ctx.content;
108
109 // Fast early return using should_skip
110 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
111 if self.should_skip(ctx)
112 && !(self.config.reflow
113 && (self.config.reflow_mode == ReflowMode::Normalize
114 || self.config.reflow_mode == ReflowMode::SentencePerLine))
115 {
116 return Ok(Vec::new());
117 }
118
119 // Direct implementation without DocumentStructure
120 let mut warnings = Vec::new();
121
122 // Check for inline configuration overrides
123 let inline_config = crate::inline_config::InlineConfig::from_content(content);
124 let config_override = inline_config.get_rule_config("MD013");
125
126 // Apply configuration override if present
127 let effective_config = if let Some(json_config) = config_override {
128 if let Some(obj) = json_config.as_object() {
129 let mut config = self.config.clone();
130 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
131 config.line_length = crate::types::LineLength::new(line_length as usize);
132 }
133 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
134 config.code_blocks = code_blocks;
135 }
136 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
137 config.tables = tables;
138 }
139 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
140 config.headings = headings;
141 }
142 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
143 config.strict = strict;
144 }
145 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
146 config.reflow = reflow;
147 }
148 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
149 config.reflow_mode = match reflow_mode {
150 "default" => ReflowMode::Default,
151 "normalize" => ReflowMode::Normalize,
152 "sentence-per-line" => ReflowMode::SentencePerLine,
153 _ => ReflowMode::default(),
154 };
155 }
156 config
157 } else {
158 self.config.clone()
159 }
160 } else {
161 self.config.clone()
162 };
163
164 // Special handling: line_length = 0 means "no line length limit"
165 // Skip all line length checks, but still allow reflow if enabled
166 let skip_length_checks = effective_config.line_length.is_unlimited();
167
168 // Pre-filter lines that could be problematic to avoid processing all lines
169 let mut candidate_lines = Vec::new();
170 if !skip_length_checks {
171 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
172 // Skip front matter - it should never be linted
173 if line_info.in_front_matter {
174 continue;
175 }
176
177 // Quick length check first
178 if line_info.byte_len > effective_config.line_length.get() {
179 candidate_lines.push(line_idx);
180 }
181 }
182 }
183
184 // If no candidate lines and not in normalize or sentence-per-line mode, early return
185 if candidate_lines.is_empty()
186 && !(effective_config.reflow
187 && (effective_config.reflow_mode == ReflowMode::Normalize
188 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
189 {
190 return Ok(warnings);
191 }
192
193 // Use ctx.lines if available for better performance
194 let lines: Vec<&str> = if !ctx.lines.is_empty() {
195 ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
196 } else {
197 content.lines().collect()
198 };
199
200 // Create a quick lookup set for heading lines
201 // We need this for both the heading skip check AND the paragraphs check
202 let heading_lines_set: std::collections::HashSet<usize> = ctx
203 .lines
204 .iter()
205 .enumerate()
206 .filter(|(_, line)| line.heading.is_some())
207 .map(|(idx, _)| idx + 1)
208 .collect();
209
210 // Use pre-computed table blocks from context
211 // We need this for both the table skip check AND the paragraphs check
212 let table_blocks = &ctx.table_blocks;
213 let mut table_lines_set = std::collections::HashSet::new();
214 for table in table_blocks {
215 table_lines_set.insert(table.header_line + 1);
216 table_lines_set.insert(table.delimiter_line + 1);
217 for &line in &table.content_lines {
218 table_lines_set.insert(line + 1);
219 }
220 }
221
222 // Process candidate lines for line length checks
223 for &line_idx in &candidate_lines {
224 let line_number = line_idx + 1;
225 let line = lines[line_idx];
226
227 // Calculate effective length excluding unbreakable URLs
228 let effective_length = self.calculate_effective_length(line);
229
230 // Use single line length limit for all content
231 let line_limit = effective_config.line_length.get();
232
233 // Skip short lines immediately (double-check after effective length calculation)
234 if effective_length <= line_limit {
235 continue;
236 }
237
238 // Skip mkdocstrings blocks (already handled by LintContext)
239 if ctx.lines[line_idx].in_mkdocstrings {
240 continue;
241 }
242
243 // Skip various block types efficiently
244 if !effective_config.strict {
245 // Skip setext heading underlines
246 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
247 continue;
248 }
249
250 // Skip block elements according to config flags
251 // The flags mean: true = check these elements, false = skip these elements
252 // So we skip when the flag is FALSE and the line is in that element type
253 if (!effective_config.headings && heading_lines_set.contains(&line_number))
254 || (!effective_config.code_blocks
255 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
256 || (!effective_config.tables && table_lines_set.contains(&line_number))
257 || ctx.lines[line_number - 1].blockquote.is_some()
258 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
259 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
260 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
261 {
262 continue;
263 }
264
265 // Check if this is a paragraph/regular text line
266 // If paragraphs = false, skip lines that are NOT in special blocks
267 if !effective_config.paragraphs {
268 let is_special_block = heading_lines_set.contains(&line_number)
269 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
270 || table_lines_set.contains(&line_number)
271 || ctx.lines[line_number - 1].blockquote.is_some()
272 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
273 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
274 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block);
275
276 // Skip regular paragraph text when paragraphs = false
277 if !is_special_block {
278 continue;
279 }
280 }
281
282 // Skip lines that are only a URL, image ref, or link ref
283 if self.should_ignore_line(line, &lines, line_idx, ctx) {
284 continue;
285 }
286 }
287
288 // In sentence-per-line mode, check if this is a single long sentence
289 // If so, emit a warning without a fix (user must manually rephrase)
290 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
291 let sentences = split_into_sentences(line.trim());
292 if sentences.len() == 1 {
293 // Single sentence that's too long - warn but don't auto-fix
294 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
295
296 let (start_line, start_col, end_line, end_col) =
297 calculate_excess_range(line_number, line, line_limit);
298
299 warnings.push(LintWarning {
300 rule_name: Some(self.name().to_string()),
301 message,
302 line: start_line,
303 column: start_col,
304 end_line,
305 end_column: end_col,
306 severity: Severity::Warning,
307 fix: None, // No auto-fix for long single sentences
308 });
309 continue;
310 }
311 // Multiple sentences will be handled by paragraph-based reflow
312 continue;
313 }
314
315 // Don't provide fix for individual lines when reflow is enabled
316 // Paragraph-based fixes will be handled separately
317 let fix = None;
318
319 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
320
321 // Calculate precise character range for the excess portion
322 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
323
324 warnings.push(LintWarning {
325 rule_name: Some(self.name().to_string()),
326 message,
327 line: start_line,
328 column: start_col,
329 end_line,
330 end_column: end_col,
331 severity: Severity::Warning,
332 fix,
333 });
334 }
335
336 // If reflow is enabled, generate paragraph-based fixes
337 if effective_config.reflow {
338 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
339 // Merge paragraph warnings with line warnings, removing duplicates
340 for pw in paragraph_warnings {
341 // Remove any line warnings that overlap with this paragraph
342 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
343 warnings.push(pw);
344 }
345 }
346
347 Ok(warnings)
348 }
349
350 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
351 // For CLI usage, apply fixes from warnings
352 // LSP will use the warning-based fixes directly
353 let warnings = self.check(ctx)?;
354
355 // If there are no fixes, return content unchanged
356 if !warnings.iter().any(|w| w.fix.is_some()) {
357 return Ok(ctx.content.to_string());
358 }
359
360 // Apply warning-based fixes
361 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
362 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
363 }
364
365 fn as_any(&self) -> &dyn std::any::Any {
366 self
367 }
368
369 fn category(&self) -> RuleCategory {
370 RuleCategory::Whitespace
371 }
372
373 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
374 // Skip if content is empty
375 if ctx.content.is_empty() {
376 return true;
377 }
378
379 // For sentence-per-line or normalize mode, never skip based on line length
380 if self.config.reflow
381 && (self.config.reflow_mode == ReflowMode::SentencePerLine
382 || self.config.reflow_mode == ReflowMode::Normalize)
383 {
384 return false;
385 }
386
387 // Quick check: if total content is shorter than line limit, definitely skip
388 if ctx.content.len() <= self.config.line_length.get() {
389 return true;
390 }
391
392 // Use more efficient check - any() with early termination instead of all()
393 !ctx.lines
394 .iter()
395 .any(|line| line.byte_len > self.config.line_length.get())
396 }
397
398 fn default_config_section(&self) -> Option<(String, toml::Value)> {
399 let default_config = MD013Config::default();
400 let json_value = serde_json::to_value(&default_config).ok()?;
401 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
402
403 if let toml::Value::Table(table) = toml_value {
404 if !table.is_empty() {
405 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
406 } else {
407 None
408 }
409 } else {
410 None
411 }
412 }
413
414 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
415 let mut aliases = std::collections::HashMap::new();
416 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
417 Some(aliases)
418 }
419
420 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
421 where
422 Self: Sized,
423 {
424 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
425 // Use global line_length if rule-specific config still has default value
426 if rule_config.line_length.get() == 80 {
427 rule_config.line_length = config.global.line_length;
428 }
429 Box::new(Self::from_config_struct(rule_config))
430 }
431}
432
433impl MD013LineLength {
434 /// Generate paragraph-based fixes
435 fn generate_paragraph_fixes(
436 &self,
437 ctx: &crate::lint_context::LintContext,
438 config: &MD013Config,
439 lines: &[&str],
440 ) -> Vec<LintWarning> {
441 let mut warnings = Vec::new();
442 let line_index = LineIndex::new(ctx.content);
443
444 let mut i = 0;
445 while i < lines.len() {
446 let line_num = i + 1;
447
448 // Skip special structures
449 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
450 info.in_code_block
451 || info.in_front_matter
452 || info.in_html_block
453 || info.in_html_comment
454 || info.in_esm_block
455 });
456
457 if should_skip_due_to_line_info
458 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
459 || lines[i].trim().starts_with('#')
460 || TableUtils::is_potential_table_row(lines[i])
461 || lines[i].trim().is_empty()
462 || is_horizontal_rule(lines[i].trim())
463 || is_template_directive_only(lines[i])
464 {
465 i += 1;
466 continue;
467 }
468
469 // Helper function to detect semantic line markers
470 let is_semantic_line = |content: &str| -> bool {
471 let trimmed = content.trim_start();
472 let semantic_markers = [
473 "NOTE:",
474 "WARNING:",
475 "IMPORTANT:",
476 "CAUTION:",
477 "TIP:",
478 "DANGER:",
479 "HINT:",
480 "INFO:",
481 ];
482 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
483 };
484
485 // Helper function to detect fence markers (opening or closing)
486 let is_fence_marker = |content: &str| -> bool {
487 let trimmed = content.trim_start();
488 trimmed.starts_with("```") || trimmed.starts_with("~~~")
489 };
490
491 // Check if this is a list item - handle it specially
492 let trimmed = lines[i].trim();
493 if is_list_item(trimmed) {
494 // Collect the entire list item including continuation lines
495 let list_start = i;
496 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
497 let marker_len = marker.len();
498
499 // Track lines and their types (content, code block, fence, nested list)
500 #[derive(Clone)]
501 enum LineType {
502 Content(String),
503 CodeBlock(String, usize), // content and original indent
504 NestedListItem(String, usize), // full line content and original indent
505 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
506 Empty,
507 }
508
509 let mut actual_indent: Option<usize> = None;
510 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
511 i += 1;
512
513 // Collect continuation lines using ctx.lines for metadata
514 while i < lines.len() {
515 let line_info = &ctx.lines[i];
516
517 // Use pre-computed is_blank from ctx
518 if line_info.is_blank {
519 // Empty line - check if next line is indented (part of list item)
520 if i + 1 < lines.len() {
521 let next_info = &ctx.lines[i + 1];
522
523 // Check if next line is indented enough to be continuation
524 if !next_info.is_blank && next_info.indent >= marker_len {
525 // This blank line is between paragraphs/blocks in the list item
526 list_item_lines.push(LineType::Empty);
527 i += 1;
528 continue;
529 }
530 }
531 // No indented line after blank, end of list item
532 break;
533 }
534
535 // Use pre-computed indent from ctx
536 let indent = line_info.indent;
537
538 // Valid continuation must be indented at least marker_len
539 if indent >= marker_len {
540 let trimmed = line_info.content(ctx.content).trim();
541
542 // Use pre-computed in_code_block from ctx
543 if line_info.in_code_block {
544 list_item_lines.push(LineType::CodeBlock(
545 line_info.content(ctx.content)[indent..].to_string(),
546 indent,
547 ));
548 i += 1;
549 continue;
550 }
551
552 // Check if this is a SIBLING list item (breaks parent)
553 // Nested lists are indented >= marker_len and are PART of the parent item
554 // Siblings are at indent < marker_len (at or before parent marker)
555 if is_list_item(trimmed) && indent < marker_len {
556 // This is a sibling item at same or higher level - end parent item
557 break;
558 }
559
560 // Check if this is a NESTED list item marker
561 // Nested lists should be processed separately UNLESS they're part of a
562 // multi-paragraph list item (indicated by a blank line before them OR
563 // it's a continuation of an already-started nested list)
564 if is_list_item(trimmed) && indent >= marker_len {
565 // Check if there was a blank line before this (multi-paragraph context)
566 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
567
568 // Check if we've already seen nested list content (another nested item)
569 let has_nested_content = list_item_lines.iter().any(|line| {
570 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
571 || matches!(line, LineType::NestedListItem(_, _))
572 });
573
574 if !has_blank_before && !has_nested_content {
575 // Single-paragraph context with no prior nested items: starts a new item
576 // End parent collection; nested list will be processed next
577 break;
578 }
579 // else: multi-paragraph context or continuation of nested list, keep collecting
580 // Mark this as a nested list item to preserve its structure
581 list_item_lines.push(LineType::NestedListItem(
582 line_info.content(ctx.content)[indent..].to_string(),
583 indent,
584 ));
585 i += 1;
586 continue;
587 }
588
589 // Normal continuation: marker_len to marker_len+3
590 if indent <= marker_len + 3 {
591 // Set actual_indent from first non-code continuation if not set
592 if actual_indent.is_none() {
593 actual_indent = Some(indent);
594 }
595
596 // Extract content (remove indentation and trailing whitespace)
597 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
598 // See: https://github.com/rvben/rumdl/issues/76
599 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
600
601 // Check if this is a fence marker (opening or closing)
602 // These should be treated as code block lines, not paragraph content
603 if is_fence_marker(&content) {
604 list_item_lines.push(LineType::CodeBlock(content, indent));
605 }
606 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
607 else if is_semantic_line(&content) {
608 list_item_lines.push(LineType::SemanticLine(content));
609 } else {
610 list_item_lines.push(LineType::Content(content));
611 }
612 i += 1;
613 } else {
614 // indent >= marker_len + 4: indented code block
615 list_item_lines.push(LineType::CodeBlock(
616 line_info.content(ctx.content)[indent..].to_string(),
617 indent,
618 ));
619 i += 1;
620 }
621 } else {
622 // Not indented enough, end of list item
623 break;
624 }
625 }
626
627 // Use detected indent or fallback to marker length
628 let indent_size = actual_indent.unwrap_or(marker_len);
629 let expected_indent = " ".repeat(indent_size);
630
631 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
632 #[derive(Clone)]
633 enum Block {
634 Paragraph(Vec<String>),
635 Code {
636 lines: Vec<(String, usize)>, // (content, indent) pairs
637 has_preceding_blank: bool, // Whether there was a blank line before this block
638 },
639 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
640 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
641 Html {
642 lines: Vec<String>, // HTML content preserved exactly as-is
643 has_preceding_blank: bool, // Whether there was a blank line before this block
644 },
645 }
646
647 // HTML tag detection helpers
648 // Block-level HTML tags that should trigger HTML block detection
649 const BLOCK_LEVEL_TAGS: &[&str] = &[
650 "div",
651 "details",
652 "summary",
653 "section",
654 "article",
655 "header",
656 "footer",
657 "nav",
658 "aside",
659 "main",
660 "table",
661 "thead",
662 "tbody",
663 "tfoot",
664 "tr",
665 "td",
666 "th",
667 "ul",
668 "ol",
669 "li",
670 "dl",
671 "dt",
672 "dd",
673 "pre",
674 "blockquote",
675 "figure",
676 "figcaption",
677 "form",
678 "fieldset",
679 "legend",
680 "hr",
681 "p",
682 "h1",
683 "h2",
684 "h3",
685 "h4",
686 "h5",
687 "h6",
688 "style",
689 "script",
690 "noscript",
691 ];
692
693 fn is_block_html_opening_tag(line: &str) -> Option<String> {
694 let trimmed = line.trim();
695
696 // Check for HTML comments
697 if trimmed.starts_with("<!--") {
698 return Some("!--".to_string());
699 }
700
701 // Check for opening tags
702 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
703 // Extract tag name from <tagname ...> or <tagname>
704 let after_bracket = &trimmed[1..];
705 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
706 let tag_name = after_bracket[..end].to_lowercase();
707
708 // Only treat as block if it's a known block-level tag
709 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
710 return Some(tag_name);
711 }
712 }
713 }
714 None
715 }
716
717 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
718 let trimmed = line.trim();
719
720 // Special handling for HTML comments
721 if tag_name == "!--" {
722 return trimmed.ends_with("-->");
723 }
724
725 // Check for closing tags: </tagname> or </tagname ...>
726 trimmed.starts_with(&format!("</{tag_name}>"))
727 || trimmed.starts_with(&format!("</{tag_name} "))
728 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
729 }
730
731 fn is_self_closing_tag(line: &str) -> bool {
732 let trimmed = line.trim();
733 trimmed.ends_with("/>")
734 }
735
736 let mut blocks: Vec<Block> = Vec::new();
737 let mut current_paragraph: Vec<String> = Vec::new();
738 let mut current_code_block: Vec<(String, usize)> = Vec::new();
739 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
740 let mut current_html_block: Vec<String> = Vec::new();
741 let mut html_tag_stack: Vec<String> = Vec::new();
742 let mut in_code = false;
743 let mut in_nested_list = false;
744 let mut in_html_block = false;
745 let mut had_preceding_blank = false; // Track if we just saw an empty line
746 let mut code_block_has_preceding_blank = false; // Track blank before current code block
747 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
748
749 for line in &list_item_lines {
750 match line {
751 LineType::Empty => {
752 if in_code {
753 current_code_block.push((String::new(), 0));
754 } else if in_nested_list {
755 current_nested_list.push((String::new(), 0));
756 } else if in_html_block {
757 // Allow blank lines inside HTML blocks
758 current_html_block.push(String::new());
759 } else if !current_paragraph.is_empty() {
760 blocks.push(Block::Paragraph(current_paragraph.clone()));
761 current_paragraph.clear();
762 }
763 // Mark that we saw a blank line
764 had_preceding_blank = true;
765 }
766 LineType::Content(content) => {
767 // Check if we're currently in an HTML block
768 if in_html_block {
769 current_html_block.push(content.clone());
770
771 // Check if this line closes any open HTML tags
772 if let Some(last_tag) = html_tag_stack.last() {
773 if is_html_closing_tag(content, last_tag) {
774 html_tag_stack.pop();
775
776 // If stack is empty, HTML block is complete
777 if html_tag_stack.is_empty() {
778 blocks.push(Block::Html {
779 lines: current_html_block.clone(),
780 has_preceding_blank: html_block_has_preceding_blank,
781 });
782 current_html_block.clear();
783 in_html_block = false;
784 }
785 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
786 // Nested opening tag within HTML block
787 if !is_self_closing_tag(content) {
788 html_tag_stack.push(new_tag);
789 }
790 }
791 }
792 had_preceding_blank = false;
793 } else {
794 // Not in HTML block - check if this line starts one
795 if let Some(tag_name) = is_block_html_opening_tag(content) {
796 // Flush current paragraph before starting HTML block
797 if in_code {
798 blocks.push(Block::Code {
799 lines: current_code_block.clone(),
800 has_preceding_blank: code_block_has_preceding_blank,
801 });
802 current_code_block.clear();
803 in_code = false;
804 } else if in_nested_list {
805 blocks.push(Block::NestedList(current_nested_list.clone()));
806 current_nested_list.clear();
807 in_nested_list = false;
808 } else if !current_paragraph.is_empty() {
809 blocks.push(Block::Paragraph(current_paragraph.clone()));
810 current_paragraph.clear();
811 }
812
813 // Start new HTML block
814 in_html_block = true;
815 html_block_has_preceding_blank = had_preceding_blank;
816 current_html_block.push(content.clone());
817
818 // Check if it's self-closing or needs a closing tag
819 if is_self_closing_tag(content) {
820 // Self-closing tag - complete the HTML block immediately
821 blocks.push(Block::Html {
822 lines: current_html_block.clone(),
823 has_preceding_blank: html_block_has_preceding_blank,
824 });
825 current_html_block.clear();
826 in_html_block = false;
827 } else {
828 // Regular opening tag - push to stack
829 html_tag_stack.push(tag_name);
830 }
831 } else {
832 // Regular content line - add to paragraph
833 if in_code {
834 // Switching from code to content
835 blocks.push(Block::Code {
836 lines: current_code_block.clone(),
837 has_preceding_blank: code_block_has_preceding_blank,
838 });
839 current_code_block.clear();
840 in_code = false;
841 } else if in_nested_list {
842 // Switching from nested list to content
843 blocks.push(Block::NestedList(current_nested_list.clone()));
844 current_nested_list.clear();
845 in_nested_list = false;
846 }
847 current_paragraph.push(content.clone());
848 }
849 had_preceding_blank = false; // Reset after content
850 }
851 }
852 LineType::CodeBlock(content, indent) => {
853 if in_nested_list {
854 // Switching from nested list to code
855 blocks.push(Block::NestedList(current_nested_list.clone()));
856 current_nested_list.clear();
857 in_nested_list = false;
858 } else if in_html_block {
859 // Switching from HTML block to code (shouldn't happen normally, but handle it)
860 blocks.push(Block::Html {
861 lines: current_html_block.clone(),
862 has_preceding_blank: html_block_has_preceding_blank,
863 });
864 current_html_block.clear();
865 html_tag_stack.clear();
866 in_html_block = false;
867 }
868 if !in_code {
869 // Switching from content to code
870 if !current_paragraph.is_empty() {
871 blocks.push(Block::Paragraph(current_paragraph.clone()));
872 current_paragraph.clear();
873 }
874 in_code = true;
875 // Record whether there was a blank line before this code block
876 code_block_has_preceding_blank = had_preceding_blank;
877 }
878 current_code_block.push((content.clone(), *indent));
879 had_preceding_blank = false; // Reset after code
880 }
881 LineType::NestedListItem(content, indent) => {
882 if in_code {
883 // Switching from code to nested list
884 blocks.push(Block::Code {
885 lines: current_code_block.clone(),
886 has_preceding_blank: code_block_has_preceding_blank,
887 });
888 current_code_block.clear();
889 in_code = false;
890 } else if in_html_block {
891 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
892 blocks.push(Block::Html {
893 lines: current_html_block.clone(),
894 has_preceding_blank: html_block_has_preceding_blank,
895 });
896 current_html_block.clear();
897 html_tag_stack.clear();
898 in_html_block = false;
899 }
900 if !in_nested_list {
901 // Switching from content to nested list
902 if !current_paragraph.is_empty() {
903 blocks.push(Block::Paragraph(current_paragraph.clone()));
904 current_paragraph.clear();
905 }
906 in_nested_list = true;
907 }
908 current_nested_list.push((content.clone(), *indent));
909 had_preceding_blank = false; // Reset after nested list
910 }
911 LineType::SemanticLine(content) => {
912 // Semantic lines are standalone - flush any current block and add as separate block
913 if in_code {
914 blocks.push(Block::Code {
915 lines: current_code_block.clone(),
916 has_preceding_blank: code_block_has_preceding_blank,
917 });
918 current_code_block.clear();
919 in_code = false;
920 } else if in_nested_list {
921 blocks.push(Block::NestedList(current_nested_list.clone()));
922 current_nested_list.clear();
923 in_nested_list = false;
924 } else if in_html_block {
925 blocks.push(Block::Html {
926 lines: current_html_block.clone(),
927 has_preceding_blank: html_block_has_preceding_blank,
928 });
929 current_html_block.clear();
930 html_tag_stack.clear();
931 in_html_block = false;
932 } else if !current_paragraph.is_empty() {
933 blocks.push(Block::Paragraph(current_paragraph.clone()));
934 current_paragraph.clear();
935 }
936 // Add semantic line as its own block
937 blocks.push(Block::SemanticLine(content.clone()));
938 had_preceding_blank = false; // Reset after semantic line
939 }
940 }
941 }
942
943 // Push remaining block
944 if in_code && !current_code_block.is_empty() {
945 blocks.push(Block::Code {
946 lines: current_code_block,
947 has_preceding_blank: code_block_has_preceding_blank,
948 });
949 } else if in_nested_list && !current_nested_list.is_empty() {
950 blocks.push(Block::NestedList(current_nested_list));
951 } else if in_html_block && !current_html_block.is_empty() {
952 // If we still have an unclosed HTML block, push it anyway
953 // (malformed HTML - missing closing tag)
954 blocks.push(Block::Html {
955 lines: current_html_block,
956 has_preceding_blank: html_block_has_preceding_blank,
957 });
958 } else if !current_paragraph.is_empty() {
959 blocks.push(Block::Paragraph(current_paragraph));
960 }
961
962 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
963 let content_lines: Vec<String> = list_item_lines
964 .iter()
965 .filter_map(|line| {
966 if let LineType::Content(s) = line {
967 Some(s.clone())
968 } else {
969 None
970 }
971 })
972 .collect();
973
974 // Check if we need to reflow this list item
975 // We check the combined content to see if it exceeds length limits
976 let combined_content = content_lines.join(" ").trim().to_string();
977 let full_line = format!("{marker}{combined_content}");
978
979 // Helper to check if we should reflow in normalize mode
980 let should_normalize = || {
981 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
982 // DO normalize if it has plain text content that spans multiple lines
983 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
984 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
985 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
986 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
987
988 // If we have nested lists, code blocks, or semantic lines but no paragraphs, don't normalize
989 if (has_nested_lists || has_code_blocks || has_semantic_lines) && !has_paragraphs {
990 return false;
991 }
992
993 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
994 if has_paragraphs {
995 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
996 if paragraph_count > 1 {
997 // Multiple paragraph blocks should be normalized
998 return true;
999 }
1000
1001 // Single paragraph block: normalize if it has multiple content lines
1002 if content_lines.len() > 1 {
1003 return true;
1004 }
1005 }
1006
1007 false
1008 };
1009
1010 let needs_reflow = match config.reflow_mode {
1011 ReflowMode::Normalize => {
1012 // Only reflow if:
1013 // 1. The combined line would exceed the limit, OR
1014 // 2. The list item should be normalized (has multi-line plain text)
1015 let combined_length = self.calculate_effective_length(&full_line);
1016 if combined_length > config.line_length.get() {
1017 true
1018 } else {
1019 should_normalize()
1020 }
1021 }
1022 ReflowMode::SentencePerLine => {
1023 // Check if list item has multiple sentences
1024 let sentences = split_into_sentences(&combined_content);
1025 sentences.len() > 1
1026 }
1027 ReflowMode::Default => {
1028 // In default mode, only reflow if lines exceed limit
1029 self.calculate_effective_length(&full_line) > config.line_length.get()
1030 }
1031 };
1032
1033 if needs_reflow {
1034 let start_range = line_index.whole_line_range(list_start + 1);
1035 let end_line = i - 1;
1036 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1037 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1038 } else {
1039 line_index.whole_line_range(end_line + 1)
1040 };
1041 let byte_range = start_range.start..end_range.end;
1042
1043 // Reflow each block (paragraphs only, preserve code blocks)
1044 // When line_length = 0 (no limit), use a very large value for reflow
1045 let reflow_line_length = if config.line_length.is_unlimited() {
1046 usize::MAX
1047 } else {
1048 config.line_length.get().saturating_sub(indent_size).max(1)
1049 };
1050 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1051 line_length: reflow_line_length,
1052 break_on_sentences: true,
1053 preserve_breaks: false,
1054 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1055 abbreviations: config.abbreviations.clone(),
1056 };
1057
1058 let mut result: Vec<String> = Vec::new();
1059 let mut is_first_block = true;
1060
1061 for (block_idx, block) in blocks.iter().enumerate() {
1062 match block {
1063 Block::Paragraph(para_lines) => {
1064 // Split the paragraph into segments at hard break boundaries
1065 // Each segment can be reflowed independently
1066 let segments = split_into_segments(para_lines);
1067
1068 for (segment_idx, segment) in segments.iter().enumerate() {
1069 // Check if this segment ends with a hard break and what type
1070 let hard_break_type = segment.last().and_then(|line| {
1071 let line = line.strip_suffix('\r').unwrap_or(line);
1072 if line.ends_with('\\') {
1073 Some("\\")
1074 } else if line.ends_with(" ") {
1075 Some(" ")
1076 } else {
1077 None
1078 }
1079 });
1080
1081 // Join and reflow the segment (removing the hard break marker for processing)
1082 let segment_for_reflow: Vec<String> = segment
1083 .iter()
1084 .map(|line| {
1085 // Strip hard break marker (2 spaces or backslash) for reflow processing
1086 if line.ends_with('\\') {
1087 line[..line.len() - 1].trim_end().to_string()
1088 } else if line.ends_with(" ") {
1089 line[..line.len() - 2].trim_end().to_string()
1090 } else {
1091 line.clone()
1092 }
1093 })
1094 .collect();
1095
1096 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1097 if !segment_text.is_empty() {
1098 let reflowed =
1099 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1100
1101 if is_first_block && segment_idx == 0 {
1102 // First segment of first block starts with marker
1103 result.push(format!("{marker}{}", reflowed[0]));
1104 for line in reflowed.iter().skip(1) {
1105 result.push(format!("{expected_indent}{line}"));
1106 }
1107 is_first_block = false;
1108 } else {
1109 // Subsequent segments
1110 for line in reflowed {
1111 result.push(format!("{expected_indent}{line}"));
1112 }
1113 }
1114
1115 // If this segment had a hard break, add it back to the last line
1116 // Preserve the original hard break format (backslash or two spaces)
1117 if let Some(break_marker) = hard_break_type
1118 && let Some(last_line) = result.last_mut()
1119 {
1120 last_line.push_str(break_marker);
1121 }
1122 }
1123 }
1124
1125 // Add blank line after paragraph block if there's a next block
1126 // BUT: check if next block is a code block that doesn't want a preceding blank
1127 if block_idx < blocks.len() - 1 {
1128 let next_block = &blocks[block_idx + 1];
1129 let should_add_blank = match next_block {
1130 Block::Code {
1131 has_preceding_blank, ..
1132 } => *has_preceding_blank,
1133 _ => true, // For all other blocks, add blank line
1134 };
1135 if should_add_blank {
1136 result.push(String::new());
1137 }
1138 }
1139 }
1140 Block::Code {
1141 lines: code_lines,
1142 has_preceding_blank: _,
1143 } => {
1144 // Preserve code blocks as-is with original indentation
1145 // NOTE: Blank line before code block is handled by the previous block
1146 // (see paragraph block's logic above)
1147
1148 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1149 if is_first_block && idx == 0 {
1150 // First line of first block gets marker
1151 result.push(format!(
1152 "{marker}{}",
1153 " ".repeat(orig_indent - marker_len) + content
1154 ));
1155 is_first_block = false;
1156 } else if content.is_empty() {
1157 result.push(String::new());
1158 } else {
1159 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1160 }
1161 }
1162 }
1163 Block::NestedList(nested_items) => {
1164 // Preserve nested list items as-is with original indentation
1165 if !is_first_block {
1166 result.push(String::new());
1167 }
1168
1169 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1170 if is_first_block && idx == 0 {
1171 // First line of first block gets marker
1172 result.push(format!(
1173 "{marker}{}",
1174 " ".repeat(orig_indent - marker_len) + content
1175 ));
1176 is_first_block = false;
1177 } else if content.is_empty() {
1178 result.push(String::new());
1179 } else {
1180 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1181 }
1182 }
1183
1184 // Add blank line after nested list if there's a next block
1185 // Check if next block is a code block that doesn't want a preceding blank
1186 if block_idx < blocks.len() - 1 {
1187 let next_block = &blocks[block_idx + 1];
1188 let should_add_blank = match next_block {
1189 Block::Code {
1190 has_preceding_blank, ..
1191 } => *has_preceding_blank,
1192 _ => true, // For all other blocks, add blank line
1193 };
1194 if should_add_blank {
1195 result.push(String::new());
1196 }
1197 }
1198 }
1199 Block::SemanticLine(content) => {
1200 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1201 // Add blank line before if not first block
1202 if !is_first_block {
1203 result.push(String::new());
1204 }
1205
1206 if is_first_block {
1207 // First block starts with marker
1208 result.push(format!("{marker}{content}"));
1209 is_first_block = false;
1210 } else {
1211 // Subsequent blocks use expected indent
1212 result.push(format!("{expected_indent}{content}"));
1213 }
1214
1215 // Add blank line after semantic line if there's a next block
1216 // Check if next block is a code block that doesn't want a preceding blank
1217 if block_idx < blocks.len() - 1 {
1218 let next_block = &blocks[block_idx + 1];
1219 let should_add_blank = match next_block {
1220 Block::Code {
1221 has_preceding_blank, ..
1222 } => *has_preceding_blank,
1223 _ => true, // For all other blocks, add blank line
1224 };
1225 if should_add_blank {
1226 result.push(String::new());
1227 }
1228 }
1229 }
1230 Block::Html {
1231 lines: html_lines,
1232 has_preceding_blank: _,
1233 } => {
1234 // Preserve HTML blocks exactly as-is with original indentation
1235 // NOTE: Blank line before HTML block is handled by the previous block
1236
1237 for (idx, line) in html_lines.iter().enumerate() {
1238 if is_first_block && idx == 0 {
1239 // First line of first block gets marker
1240 result.push(format!("{marker}{line}"));
1241 is_first_block = false;
1242 } else if line.is_empty() {
1243 // Preserve blank lines inside HTML blocks
1244 result.push(String::new());
1245 } else {
1246 // Preserve lines with their original content (already includes indentation)
1247 result.push(format!("{expected_indent}{line}"));
1248 }
1249 }
1250
1251 // Add blank line after HTML block if there's a next block
1252 if block_idx < blocks.len() - 1 {
1253 let next_block = &blocks[block_idx + 1];
1254 let should_add_blank = match next_block {
1255 Block::Code {
1256 has_preceding_blank, ..
1257 } => *has_preceding_blank,
1258 Block::Html {
1259 has_preceding_blank, ..
1260 } => *has_preceding_blank,
1261 _ => true, // For all other blocks, add blank line
1262 };
1263 if should_add_blank {
1264 result.push(String::new());
1265 }
1266 }
1267 }
1268 }
1269 }
1270
1271 let reflowed_text = result.join("\n");
1272
1273 // Preserve trailing newline
1274 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1275 format!("{reflowed_text}\n")
1276 } else {
1277 reflowed_text
1278 };
1279
1280 // Get the original text to compare
1281 let original_text = &ctx.content[byte_range.clone()];
1282
1283 // Only generate a warning if the replacement is different from the original
1284 if original_text != replacement {
1285 // Generate an appropriate message based on why reflow is needed
1286 let message = match config.reflow_mode {
1287 ReflowMode::SentencePerLine => {
1288 let num_sentences = split_into_sentences(&combined_content).len();
1289 let num_lines = content_lines.len();
1290 if num_lines == 1 {
1291 // Single line with multiple sentences
1292 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1293 } else {
1294 // Multiple lines - could be split sentences or mixed
1295 format!(
1296 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1297 )
1298 }
1299 }
1300 ReflowMode::Normalize => {
1301 let combined_length = self.calculate_effective_length(&full_line);
1302 if combined_length > config.line_length.get() {
1303 format!(
1304 "Line length {} exceeds {} characters",
1305 combined_length,
1306 config.line_length.get()
1307 )
1308 } else {
1309 "Multi-line content can be normalized".to_string()
1310 }
1311 }
1312 ReflowMode::Default => {
1313 let combined_length = self.calculate_effective_length(&full_line);
1314 format!(
1315 "Line length {} exceeds {} characters",
1316 combined_length,
1317 config.line_length.get()
1318 )
1319 }
1320 };
1321
1322 warnings.push(LintWarning {
1323 rule_name: Some(self.name().to_string()),
1324 message,
1325 line: list_start + 1,
1326 column: 1,
1327 end_line: end_line + 1,
1328 end_column: lines[end_line].len() + 1,
1329 severity: Severity::Warning,
1330 fix: Some(crate::rule::Fix {
1331 range: byte_range,
1332 replacement,
1333 }),
1334 });
1335 }
1336 }
1337 continue;
1338 }
1339
1340 // Found start of a paragraph - collect all lines in it
1341 let paragraph_start = i;
1342 let mut paragraph_lines = vec![lines[i]];
1343 i += 1;
1344
1345 while i < lines.len() {
1346 let next_line = lines[i];
1347 let next_line_num = i + 1;
1348 let next_trimmed = next_line.trim();
1349
1350 // Stop at paragraph boundaries
1351 if next_trimmed.is_empty()
1352 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1353 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1354 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1355 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1356 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1357 || (next_line_num > 0
1358 && next_line_num <= ctx.lines.len()
1359 && ctx.lines[next_line_num - 1].blockquote.is_some())
1360 || next_trimmed.starts_with('#')
1361 || TableUtils::is_potential_table_row(next_line)
1362 || is_list_item(next_trimmed)
1363 || is_horizontal_rule(next_trimmed)
1364 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1365 || is_template_directive_only(next_line)
1366 {
1367 break;
1368 }
1369
1370 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1371 if i > 0 && has_hard_break(lines[i - 1]) {
1372 // Don't include lines after hard breaks in the same paragraph
1373 break;
1374 }
1375
1376 paragraph_lines.push(next_line);
1377 i += 1;
1378 }
1379
1380 // Combine paragraph lines into a single string for processing
1381 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1382 let paragraph_text = paragraph_lines.join(" ");
1383
1384 // Skip reflowing if this paragraph contains definition list items
1385 // Definition lists are multi-line structures that should not be joined
1386 let contains_definition_list = paragraph_lines
1387 .iter()
1388 .any(|line| crate::utils::is_definition_list_item(line));
1389
1390 if contains_definition_list {
1391 // Don't reflow definition lists - skip this paragraph
1392 i = paragraph_start + paragraph_lines.len();
1393 continue;
1394 }
1395
1396 // Check if this paragraph needs reflowing
1397 let needs_reflow = match config.reflow_mode {
1398 ReflowMode::Normalize => {
1399 // In normalize mode, reflow multi-line paragraphs
1400 paragraph_lines.len() > 1
1401 }
1402 ReflowMode::SentencePerLine => {
1403 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1404 // Note: we check the joined text because sentences can span multiple lines
1405 let sentences = split_into_sentences(¶graph_text);
1406
1407 // Always reflow if multiple sentences on one line
1408 if sentences.len() > 1 {
1409 true
1410 } else if paragraph_lines.len() > 1 {
1411 // For single-sentence paragraphs spanning multiple lines:
1412 // Reflow if they COULD fit on one line (respecting line-length constraint)
1413 if config.line_length.is_unlimited() {
1414 // No line-length constraint - always join single sentences
1415 true
1416 } else {
1417 // Only join if it fits within line-length
1418 let effective_length = self.calculate_effective_length(¶graph_text);
1419 effective_length <= config.line_length.get()
1420 }
1421 } else {
1422 false
1423 }
1424 }
1425 ReflowMode::Default => {
1426 // In default mode, only reflow if lines exceed limit
1427 paragraph_lines
1428 .iter()
1429 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1430 }
1431 };
1432
1433 if needs_reflow {
1434 // Calculate byte range for this paragraph
1435 // Use whole_line_range for each line and combine
1436 let start_range = line_index.whole_line_range(paragraph_start + 1);
1437 let end_line = paragraph_start + paragraph_lines.len() - 1;
1438
1439 // For the last line, we want to preserve any trailing newline
1440 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1441 // Last line without trailing newline - use line_text_range
1442 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1443 } else {
1444 // Not the last line or has trailing newline - use whole_line_range
1445 line_index.whole_line_range(end_line + 1)
1446 };
1447
1448 let byte_range = start_range.start..end_range.end;
1449
1450 // Check if the paragraph ends with a hard break and what type
1451 let hard_break_type = paragraph_lines.last().and_then(|line| {
1452 let line = line.strip_suffix('\r').unwrap_or(line);
1453 if line.ends_with('\\') {
1454 Some("\\")
1455 } else if line.ends_with(" ") {
1456 Some(" ")
1457 } else {
1458 None
1459 }
1460 });
1461
1462 // Reflow the paragraph
1463 // When line_length = 0 (no limit), use a very large value for reflow
1464 let reflow_line_length = if config.line_length.is_unlimited() {
1465 usize::MAX
1466 } else {
1467 config.line_length.get()
1468 };
1469 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1470 line_length: reflow_line_length,
1471 break_on_sentences: true,
1472 preserve_breaks: false,
1473 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1474 abbreviations: config.abbreviations.clone(),
1475 };
1476 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1477
1478 // If the original paragraph ended with a hard break, preserve it
1479 // Preserve the original hard break format (backslash or two spaces)
1480 if let Some(break_marker) = hard_break_type
1481 && !reflowed.is_empty()
1482 {
1483 let last_idx = reflowed.len() - 1;
1484 if !has_hard_break(&reflowed[last_idx]) {
1485 reflowed[last_idx].push_str(break_marker);
1486 }
1487 }
1488
1489 let reflowed_text = reflowed.join("\n");
1490
1491 // Preserve trailing newline if the original paragraph had one
1492 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1493 format!("{reflowed_text}\n")
1494 } else {
1495 reflowed_text
1496 };
1497
1498 // Get the original text to compare
1499 let original_text = &ctx.content[byte_range.clone()];
1500
1501 // Only generate a warning if the replacement is different from the original
1502 if original_text != replacement {
1503 // Create warning with actual fix
1504 // In default mode, report the specific line that violates
1505 // In normalize mode, report the whole paragraph
1506 // In sentence-per-line mode, report the entire paragraph
1507 let (warning_line, warning_end_line) = match config.reflow_mode {
1508 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1509 ReflowMode::SentencePerLine => {
1510 // Highlight the entire paragraph that needs reformatting
1511 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1512 }
1513 ReflowMode::Default => {
1514 // Find the first line that exceeds the limit
1515 let mut violating_line = paragraph_start;
1516 for (idx, line) in paragraph_lines.iter().enumerate() {
1517 if self.calculate_effective_length(line) > config.line_length.get() {
1518 violating_line = paragraph_start + idx;
1519 break;
1520 }
1521 }
1522 (violating_line + 1, violating_line + 1)
1523 }
1524 };
1525
1526 warnings.push(LintWarning {
1527 rule_name: Some(self.name().to_string()),
1528 message: match config.reflow_mode {
1529 ReflowMode::Normalize => format!(
1530 "Paragraph could be normalized to use line length of {} characters",
1531 config.line_length.get()
1532 ),
1533 ReflowMode::SentencePerLine => {
1534 let num_sentences = split_into_sentences(¶graph_text).len();
1535 if paragraph_lines.len() == 1 {
1536 // Single line with multiple sentences
1537 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1538 } else {
1539 let num_lines = paragraph_lines.len();
1540 // Multiple lines - could be split sentences or mixed
1541 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1542 }
1543 },
1544 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1545 },
1546 line: warning_line,
1547 column: 1,
1548 end_line: warning_end_line,
1549 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1550 severity: Severity::Warning,
1551 fix: Some(crate::rule::Fix {
1552 range: byte_range,
1553 replacement,
1554 }),
1555 });
1556 }
1557 }
1558 }
1559
1560 warnings
1561 }
1562
1563 /// Calculate string length based on the configured length mode
1564 fn calculate_string_length(&self, s: &str) -> usize {
1565 match self.config.length_mode {
1566 LengthMode::Chars => s.chars().count(),
1567 LengthMode::Visual => s.width(),
1568 LengthMode::Bytes => s.len(),
1569 }
1570 }
1571
1572 /// Calculate effective line length excluding unbreakable URLs
1573 fn calculate_effective_length(&self, line: &str) -> usize {
1574 if self.config.strict {
1575 // In strict mode, count everything
1576 return self.calculate_string_length(line);
1577 }
1578
1579 // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1580 let bytes = line.as_bytes();
1581 if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1582 return self.calculate_string_length(line);
1583 }
1584
1585 // More precise check for URLs and links
1586 if !line.contains("http") && !line.contains('[') {
1587 return self.calculate_string_length(line);
1588 }
1589
1590 let mut effective_line = line.to_string();
1591
1592 // First handle markdown links to avoid double-counting URLs
1593 // Pattern: [text](very-long-url) -> [text](url)
1594 if line.contains('[') && line.contains("](") {
1595 for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1596 if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1597 && url.as_str().len() > 15
1598 {
1599 let replacement = format!("[{}](url)", text.as_str());
1600 effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1601 }
1602 }
1603 }
1604
1605 // Then replace bare URLs with a placeholder of reasonable length
1606 // This allows lines with long URLs to pass if the rest of the content is reasonable
1607 if effective_line.contains("http") {
1608 for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1609 let url = url_match.as_str();
1610 // Skip if this URL is already part of a markdown link we handled
1611 if !effective_line.contains(&format!("({url})")) {
1612 // Replace URL with placeholder that represents a "reasonable" URL length
1613 // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1614 let placeholder = "x".repeat(15.min(url.len()));
1615 effective_line = effective_line.replacen(url, &placeholder, 1);
1616 }
1617 }
1618 }
1619
1620 self.calculate_string_length(&effective_line)
1621 }
1622}