rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::range_utils::LineIndex;
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9 IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19 split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30 pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35 Self {
36 config: MD013Config {
37 line_length: crate::types::LineLength::new(line_length),
38 code_blocks,
39 tables,
40 headings,
41 paragraphs: true, // Default to true for backwards compatibility
42 strict,
43 reflow: false,
44 reflow_mode: ReflowMode::default(),
45 length_mode: LengthMode::default(),
46 abbreviations: None,
47 },
48 }
49 }
50
51 pub fn from_config_struct(config: MD013Config) -> Self {
52 Self { config }
53 }
54
55 fn should_ignore_line(
56 &self,
57 line: &str,
58 _lines: &[&str],
59 current_line: usize,
60 ctx: &crate::lint_context::LintContext,
61 ) -> bool {
62 if self.config.strict {
63 return false;
64 }
65
66 // Quick check for common patterns before expensive regex
67 let trimmed = line.trim();
68
69 // Only skip if the entire line is a URL (quick check first)
70 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71 return true;
72 }
73
74 // Only skip if the entire line is an image reference (quick check first)
75 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76 return true;
77 }
78
79 // Only skip if the entire line is a link reference (quick check first)
80 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81 return true;
82 }
83
84 // Code blocks with long strings (only check if in code block)
85 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86 && !trimmed.is_empty()
87 && !line.contains(' ')
88 && !line.contains('\t')
89 {
90 return true;
91 }
92
93 false
94 }
95}
96
97impl Rule for MD013LineLength {
98 fn name(&self) -> &'static str {
99 "MD013"
100 }
101
102 fn description(&self) -> &'static str {
103 "Line length should not be excessive"
104 }
105
106 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
107 let content = ctx.content;
108
109 // Fast early return using should_skip
110 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
111 if self.should_skip(ctx)
112 && !(self.config.reflow
113 && (self.config.reflow_mode == ReflowMode::Normalize
114 || self.config.reflow_mode == ReflowMode::SentencePerLine))
115 {
116 return Ok(Vec::new());
117 }
118
119 // Direct implementation without DocumentStructure
120 let mut warnings = Vec::new();
121
122 // Check for inline configuration overrides
123 let inline_config = crate::inline_config::InlineConfig::from_content(content);
124 let config_override = inline_config.get_rule_config("MD013");
125
126 // Apply configuration override if present
127 let effective_config = if let Some(json_config) = config_override {
128 if let Some(obj) = json_config.as_object() {
129 let mut config = self.config.clone();
130 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
131 config.line_length = crate::types::LineLength::new(line_length as usize);
132 }
133 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
134 config.code_blocks = code_blocks;
135 }
136 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
137 config.tables = tables;
138 }
139 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
140 config.headings = headings;
141 }
142 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
143 config.strict = strict;
144 }
145 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
146 config.reflow = reflow;
147 }
148 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
149 config.reflow_mode = match reflow_mode {
150 "default" => ReflowMode::Default,
151 "normalize" => ReflowMode::Normalize,
152 "sentence-per-line" => ReflowMode::SentencePerLine,
153 _ => ReflowMode::default(),
154 };
155 }
156 config
157 } else {
158 self.config.clone()
159 }
160 } else {
161 self.config.clone()
162 };
163
164 // Special handling: line_length = 0 means "no line length limit"
165 // Skip all line length checks, but still allow reflow if enabled
166 let skip_length_checks = effective_config.line_length.is_unlimited();
167
168 // Pre-filter lines that could be problematic to avoid processing all lines
169 let mut candidate_lines = Vec::new();
170 if !skip_length_checks {
171 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
172 // Skip front matter - it should never be linted
173 if line_info.in_front_matter {
174 continue;
175 }
176
177 // Quick length check first
178 if line_info.byte_len > effective_config.line_length.get() {
179 candidate_lines.push(line_idx);
180 }
181 }
182 }
183
184 // If no candidate lines and not in normalize or sentence-per-line mode, early return
185 if candidate_lines.is_empty()
186 && !(effective_config.reflow
187 && (effective_config.reflow_mode == ReflowMode::Normalize
188 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
189 {
190 return Ok(warnings);
191 }
192
193 // Use ctx.lines if available for better performance
194 let lines: Vec<&str> = if !ctx.lines.is_empty() {
195 ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
196 } else {
197 content.lines().collect()
198 };
199
200 // Create a quick lookup set for heading lines
201 // We need this for both the heading skip check AND the paragraphs check
202 let heading_lines_set: std::collections::HashSet<usize> = ctx
203 .lines
204 .iter()
205 .enumerate()
206 .filter(|(_, line)| line.heading.is_some())
207 .map(|(idx, _)| idx + 1)
208 .collect();
209
210 // Use pre-computed table blocks from context
211 // We need this for both the table skip check AND the paragraphs check
212 let table_blocks = &ctx.table_blocks;
213 let mut table_lines_set = std::collections::HashSet::new();
214 for table in table_blocks {
215 table_lines_set.insert(table.header_line + 1);
216 table_lines_set.insert(table.delimiter_line + 1);
217 for &line in &table.content_lines {
218 table_lines_set.insert(line + 1);
219 }
220 }
221
222 // Process candidate lines for line length checks
223 for &line_idx in &candidate_lines {
224 let line_number = line_idx + 1;
225 let line = lines[line_idx];
226
227 // Calculate effective length excluding unbreakable URLs
228 let effective_length = self.calculate_effective_length(line);
229
230 // Use single line length limit for all content
231 let line_limit = effective_config.line_length.get();
232
233 // Skip short lines immediately (double-check after effective length calculation)
234 if effective_length <= line_limit {
235 continue;
236 }
237
238 // Skip mkdocstrings blocks (already handled by LintContext)
239 if ctx.lines[line_idx].in_mkdocstrings {
240 continue;
241 }
242
243 // Skip various block types efficiently
244 if !effective_config.strict {
245 // Skip setext heading underlines
246 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
247 continue;
248 }
249
250 // Skip block elements according to config flags
251 // The flags mean: true = check these elements, false = skip these elements
252 // So we skip when the flag is FALSE and the line is in that element type
253 if (!effective_config.headings && heading_lines_set.contains(&line_number))
254 || (!effective_config.code_blocks
255 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
256 || (!effective_config.tables && table_lines_set.contains(&line_number))
257 || ctx.lines[line_number - 1].blockquote.is_some()
258 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
259 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
260 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
261 {
262 continue;
263 }
264
265 // Check if this is a paragraph/regular text line
266 // If paragraphs = false, skip lines that are NOT in special blocks
267 if !effective_config.paragraphs {
268 let is_special_block = heading_lines_set.contains(&line_number)
269 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
270 || table_lines_set.contains(&line_number)
271 || ctx.lines[line_number - 1].blockquote.is_some()
272 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
273 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
274 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block);
275
276 // Skip regular paragraph text when paragraphs = false
277 if !is_special_block {
278 continue;
279 }
280 }
281
282 // Skip lines that are only a URL, image ref, or link ref
283 if self.should_ignore_line(line, &lines, line_idx, ctx) {
284 continue;
285 }
286 }
287
288 // In sentence-per-line mode, check if this is a single long sentence
289 // If so, emit a warning without a fix (user must manually rephrase)
290 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
291 let sentences = split_into_sentences(line.trim());
292 if sentences.len() == 1 {
293 // Single sentence that's too long - warn but don't auto-fix
294 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
295
296 let (start_line, start_col, end_line, end_col) =
297 calculate_excess_range(line_number, line, line_limit);
298
299 warnings.push(LintWarning {
300 rule_name: Some(self.name().to_string()),
301 message,
302 line: start_line,
303 column: start_col,
304 end_line,
305 end_column: end_col,
306 severity: Severity::Warning,
307 fix: None, // No auto-fix for long single sentences
308 });
309 continue;
310 }
311 // Multiple sentences will be handled by paragraph-based reflow
312 continue;
313 }
314
315 // Don't provide fix for individual lines when reflow is enabled
316 // Paragraph-based fixes will be handled separately
317 let fix = None;
318
319 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
320
321 // Calculate precise character range for the excess portion
322 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
323
324 warnings.push(LintWarning {
325 rule_name: Some(self.name().to_string()),
326 message,
327 line: start_line,
328 column: start_col,
329 end_line,
330 end_column: end_col,
331 severity: Severity::Warning,
332 fix,
333 });
334 }
335
336 // If reflow is enabled, generate paragraph-based fixes
337 if effective_config.reflow {
338 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
339 // Merge paragraph warnings with line warnings, removing duplicates
340 for pw in paragraph_warnings {
341 // Remove any line warnings that overlap with this paragraph
342 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
343 warnings.push(pw);
344 }
345 }
346
347 Ok(warnings)
348 }
349
350 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
351 // For CLI usage, apply fixes from warnings
352 // LSP will use the warning-based fixes directly
353 let warnings = self.check(ctx)?;
354
355 // If there are no fixes, return content unchanged
356 if !warnings.iter().any(|w| w.fix.is_some()) {
357 return Ok(ctx.content.to_string());
358 }
359
360 // Apply warning-based fixes
361 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
362 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
363 }
364
365 fn as_any(&self) -> &dyn std::any::Any {
366 self
367 }
368
369 fn category(&self) -> RuleCategory {
370 RuleCategory::Whitespace
371 }
372
373 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
374 // Skip if content is empty
375 if ctx.content.is_empty() {
376 return true;
377 }
378
379 // For sentence-per-line or normalize mode, never skip based on line length
380 if self.config.reflow
381 && (self.config.reflow_mode == ReflowMode::SentencePerLine
382 || self.config.reflow_mode == ReflowMode::Normalize)
383 {
384 return false;
385 }
386
387 // Quick check: if total content is shorter than line limit, definitely skip
388 if ctx.content.len() <= self.config.line_length.get() {
389 return true;
390 }
391
392 // Use more efficient check - any() with early termination instead of all()
393 !ctx.lines
394 .iter()
395 .any(|line| line.byte_len > self.config.line_length.get())
396 }
397
398 fn default_config_section(&self) -> Option<(String, toml::Value)> {
399 let default_config = MD013Config::default();
400 let json_value = serde_json::to_value(&default_config).ok()?;
401 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
402
403 if let toml::Value::Table(table) = toml_value {
404 if !table.is_empty() {
405 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
406 } else {
407 None
408 }
409 } else {
410 None
411 }
412 }
413
414 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
415 let mut aliases = std::collections::HashMap::new();
416 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
417 Some(aliases)
418 }
419
420 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
421 where
422 Self: Sized,
423 {
424 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
425 // Use global line_length if rule-specific config still has default value
426 if rule_config.line_length.get() == 80 {
427 rule_config.line_length = config.global.line_length;
428 }
429 Box::new(Self::from_config_struct(rule_config))
430 }
431}
432
433impl MD013LineLength {
434 /// Generate paragraph-based fixes
435 fn generate_paragraph_fixes(
436 &self,
437 ctx: &crate::lint_context::LintContext,
438 config: &MD013Config,
439 lines: &[&str],
440 ) -> Vec<LintWarning> {
441 let mut warnings = Vec::new();
442 let line_index = LineIndex::new(ctx.content);
443
444 let mut i = 0;
445 while i < lines.len() {
446 let line_num = i + 1;
447
448 // Skip special structures
449 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
450 info.in_code_block
451 || info.in_front_matter
452 || info.in_html_block
453 || info.in_html_comment
454 || info.in_esm_block
455 });
456
457 if should_skip_due_to_line_info
458 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
459 || lines[i].trim().starts_with('#')
460 || TableUtils::is_potential_table_row(lines[i])
461 || lines[i].trim().is_empty()
462 || is_horizontal_rule(lines[i].trim())
463 || is_template_directive_only(lines[i])
464 {
465 i += 1;
466 continue;
467 }
468
469 // Helper function to detect semantic line markers
470 let is_semantic_line = |content: &str| -> bool {
471 let trimmed = content.trim_start();
472 let semantic_markers = [
473 "NOTE:",
474 "WARNING:",
475 "IMPORTANT:",
476 "CAUTION:",
477 "TIP:",
478 "DANGER:",
479 "HINT:",
480 "INFO:",
481 ];
482 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
483 };
484
485 // Helper function to detect fence markers (opening or closing)
486 let is_fence_marker = |content: &str| -> bool {
487 let trimmed = content.trim_start();
488 trimmed.starts_with("```") || trimmed.starts_with("~~~")
489 };
490
491 // Check if this is a list item - handle it specially
492 let trimmed = lines[i].trim();
493 if is_list_item(trimmed) {
494 // Collect the entire list item including continuation lines
495 let list_start = i;
496 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
497 let marker_len = marker.len();
498
499 // Track lines and their types (content, code block, fence, nested list)
500 #[derive(Clone)]
501 enum LineType {
502 Content(String),
503 CodeBlock(String, usize), // content and original indent
504 NestedListItem(String, usize), // full line content and original indent
505 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
506 Empty,
507 }
508
509 let mut actual_indent: Option<usize> = None;
510 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
511 i += 1;
512
513 // Collect continuation lines using ctx.lines for metadata
514 while i < lines.len() {
515 let line_info = &ctx.lines[i];
516
517 // Use pre-computed is_blank from ctx
518 if line_info.is_blank {
519 // Empty line - check if next line is indented (part of list item)
520 if i + 1 < lines.len() {
521 let next_info = &ctx.lines[i + 1];
522
523 // Check if next line is indented enough to be continuation
524 if !next_info.is_blank && next_info.indent >= marker_len {
525 // This blank line is between paragraphs/blocks in the list item
526 list_item_lines.push(LineType::Empty);
527 i += 1;
528 continue;
529 }
530 }
531 // No indented line after blank, end of list item
532 break;
533 }
534
535 // Use pre-computed indent from ctx
536 let indent = line_info.indent;
537
538 // Valid continuation must be indented at least marker_len
539 if indent >= marker_len {
540 let trimmed = line_info.content(ctx.content).trim();
541
542 // Use pre-computed in_code_block from ctx
543 if line_info.in_code_block {
544 list_item_lines.push(LineType::CodeBlock(
545 line_info.content(ctx.content)[indent..].to_string(),
546 indent,
547 ));
548 i += 1;
549 continue;
550 }
551
552 // Check if this is a SIBLING list item (breaks parent)
553 // Nested lists are indented >= marker_len and are PART of the parent item
554 // Siblings are at indent < marker_len (at or before parent marker)
555 if is_list_item(trimmed) && indent < marker_len {
556 // This is a sibling item at same or higher level - end parent item
557 break;
558 }
559
560 // Check if this is a NESTED list item marker
561 // Nested lists should be processed separately UNLESS they're part of a
562 // multi-paragraph list item (indicated by a blank line before them OR
563 // it's a continuation of an already-started nested list)
564 if is_list_item(trimmed) && indent >= marker_len {
565 // Check if there was a blank line before this (multi-paragraph context)
566 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
567
568 // Check if we've already seen nested list content (another nested item)
569 let has_nested_content = list_item_lines.iter().any(|line| {
570 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
571 || matches!(line, LineType::NestedListItem(_, _))
572 });
573
574 if !has_blank_before && !has_nested_content {
575 // Single-paragraph context with no prior nested items: starts a new item
576 // End parent collection; nested list will be processed next
577 break;
578 }
579 // else: multi-paragraph context or continuation of nested list, keep collecting
580 // Mark this as a nested list item to preserve its structure
581 list_item_lines.push(LineType::NestedListItem(
582 line_info.content(ctx.content)[indent..].to_string(),
583 indent,
584 ));
585 i += 1;
586 continue;
587 }
588
589 // Normal continuation: marker_len to marker_len+3
590 if indent <= marker_len + 3 {
591 // Set actual_indent from first non-code continuation if not set
592 if actual_indent.is_none() {
593 actual_indent = Some(indent);
594 }
595
596 // Extract content (remove indentation and trailing whitespace)
597 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
598 // See: https://github.com/rvben/rumdl/issues/76
599 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
600
601 // Check if this is a fence marker (opening or closing)
602 // These should be treated as code block lines, not paragraph content
603 if is_fence_marker(&content) {
604 list_item_lines.push(LineType::CodeBlock(content, indent));
605 }
606 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
607 else if is_semantic_line(&content) {
608 list_item_lines.push(LineType::SemanticLine(content));
609 } else {
610 list_item_lines.push(LineType::Content(content));
611 }
612 i += 1;
613 } else {
614 // indent >= marker_len + 4: indented code block
615 list_item_lines.push(LineType::CodeBlock(
616 line_info.content(ctx.content)[indent..].to_string(),
617 indent,
618 ));
619 i += 1;
620 }
621 } else {
622 // Not indented enough, end of list item
623 break;
624 }
625 }
626
627 // Use detected indent or fallback to marker length
628 let indent_size = actual_indent.unwrap_or(marker_len);
629 let expected_indent = " ".repeat(indent_size);
630
631 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
632 #[derive(Clone)]
633 enum Block {
634 Paragraph(Vec<String>),
635 Code {
636 lines: Vec<(String, usize)>, // (content, indent) pairs
637 has_preceding_blank: bool, // Whether there was a blank line before this block
638 },
639 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
640 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
641 Html {
642 lines: Vec<String>, // HTML content preserved exactly as-is
643 has_preceding_blank: bool, // Whether there was a blank line before this block
644 },
645 }
646
647 // HTML tag detection helpers
648 // Block-level HTML tags that should trigger HTML block detection
649 const BLOCK_LEVEL_TAGS: &[&str] = &[
650 "div",
651 "details",
652 "summary",
653 "section",
654 "article",
655 "header",
656 "footer",
657 "nav",
658 "aside",
659 "main",
660 "table",
661 "thead",
662 "tbody",
663 "tfoot",
664 "tr",
665 "td",
666 "th",
667 "ul",
668 "ol",
669 "li",
670 "dl",
671 "dt",
672 "dd",
673 "pre",
674 "blockquote",
675 "figure",
676 "figcaption",
677 "form",
678 "fieldset",
679 "legend",
680 "hr",
681 "p",
682 "h1",
683 "h2",
684 "h3",
685 "h4",
686 "h5",
687 "h6",
688 "style",
689 "script",
690 "noscript",
691 ];
692
693 fn is_block_html_opening_tag(line: &str) -> Option<String> {
694 let trimmed = line.trim();
695
696 // Check for HTML comments
697 if trimmed.starts_with("<!--") {
698 return Some("!--".to_string());
699 }
700
701 // Check for opening tags
702 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
703 // Extract tag name from <tagname ...> or <tagname>
704 let after_bracket = &trimmed[1..];
705 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
706 let tag_name = after_bracket[..end].to_lowercase();
707
708 // Only treat as block if it's a known block-level tag
709 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
710 return Some(tag_name);
711 }
712 }
713 }
714 None
715 }
716
717 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
718 let trimmed = line.trim();
719
720 // Special handling for HTML comments
721 if tag_name == "!--" {
722 return trimmed.ends_with("-->");
723 }
724
725 // Check for closing tags: </tagname> or </tagname ...>
726 trimmed.starts_with(&format!("</{tag_name}>"))
727 || trimmed.starts_with(&format!("</{tag_name} "))
728 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
729 }
730
731 fn is_self_closing_tag(line: &str) -> bool {
732 let trimmed = line.trim();
733 trimmed.ends_with("/>")
734 }
735
736 let mut blocks: Vec<Block> = Vec::new();
737 let mut current_paragraph: Vec<String> = Vec::new();
738 let mut current_code_block: Vec<(String, usize)> = Vec::new();
739 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
740 let mut current_html_block: Vec<String> = Vec::new();
741 let mut html_tag_stack: Vec<String> = Vec::new();
742 let mut in_code = false;
743 let mut in_nested_list = false;
744 let mut in_html_block = false;
745 let mut had_preceding_blank = false; // Track if we just saw an empty line
746 let mut code_block_has_preceding_blank = false; // Track blank before current code block
747 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
748
749 for line in &list_item_lines {
750 match line {
751 LineType::Empty => {
752 if in_code {
753 current_code_block.push((String::new(), 0));
754 } else if in_nested_list {
755 current_nested_list.push((String::new(), 0));
756 } else if in_html_block {
757 // Allow blank lines inside HTML blocks
758 current_html_block.push(String::new());
759 } else if !current_paragraph.is_empty() {
760 blocks.push(Block::Paragraph(current_paragraph.clone()));
761 current_paragraph.clear();
762 }
763 // Mark that we saw a blank line
764 had_preceding_blank = true;
765 }
766 LineType::Content(content) => {
767 // Check if we're currently in an HTML block
768 if in_html_block {
769 current_html_block.push(content.clone());
770
771 // Check if this line closes any open HTML tags
772 if let Some(last_tag) = html_tag_stack.last() {
773 if is_html_closing_tag(content, last_tag) {
774 html_tag_stack.pop();
775
776 // If stack is empty, HTML block is complete
777 if html_tag_stack.is_empty() {
778 blocks.push(Block::Html {
779 lines: current_html_block.clone(),
780 has_preceding_blank: html_block_has_preceding_blank,
781 });
782 current_html_block.clear();
783 in_html_block = false;
784 }
785 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
786 // Nested opening tag within HTML block
787 if !is_self_closing_tag(content) {
788 html_tag_stack.push(new_tag);
789 }
790 }
791 }
792 had_preceding_blank = false;
793 } else {
794 // Not in HTML block - check if this line starts one
795 if let Some(tag_name) = is_block_html_opening_tag(content) {
796 // Flush current paragraph before starting HTML block
797 if in_code {
798 blocks.push(Block::Code {
799 lines: current_code_block.clone(),
800 has_preceding_blank: code_block_has_preceding_blank,
801 });
802 current_code_block.clear();
803 in_code = false;
804 } else if in_nested_list {
805 blocks.push(Block::NestedList(current_nested_list.clone()));
806 current_nested_list.clear();
807 in_nested_list = false;
808 } else if !current_paragraph.is_empty() {
809 blocks.push(Block::Paragraph(current_paragraph.clone()));
810 current_paragraph.clear();
811 }
812
813 // Start new HTML block
814 in_html_block = true;
815 html_block_has_preceding_blank = had_preceding_blank;
816 current_html_block.push(content.clone());
817
818 // Check if it's self-closing or needs a closing tag
819 if is_self_closing_tag(content) {
820 // Self-closing tag - complete the HTML block immediately
821 blocks.push(Block::Html {
822 lines: current_html_block.clone(),
823 has_preceding_blank: html_block_has_preceding_blank,
824 });
825 current_html_block.clear();
826 in_html_block = false;
827 } else {
828 // Regular opening tag - push to stack
829 html_tag_stack.push(tag_name);
830 }
831 } else {
832 // Regular content line - add to paragraph
833 if in_code {
834 // Switching from code to content
835 blocks.push(Block::Code {
836 lines: current_code_block.clone(),
837 has_preceding_blank: code_block_has_preceding_blank,
838 });
839 current_code_block.clear();
840 in_code = false;
841 } else if in_nested_list {
842 // Switching from nested list to content
843 blocks.push(Block::NestedList(current_nested_list.clone()));
844 current_nested_list.clear();
845 in_nested_list = false;
846 }
847 current_paragraph.push(content.clone());
848 }
849 had_preceding_blank = false; // Reset after content
850 }
851 }
852 LineType::CodeBlock(content, indent) => {
853 if in_nested_list {
854 // Switching from nested list to code
855 blocks.push(Block::NestedList(current_nested_list.clone()));
856 current_nested_list.clear();
857 in_nested_list = false;
858 } else if in_html_block {
859 // Switching from HTML block to code (shouldn't happen normally, but handle it)
860 blocks.push(Block::Html {
861 lines: current_html_block.clone(),
862 has_preceding_blank: html_block_has_preceding_blank,
863 });
864 current_html_block.clear();
865 html_tag_stack.clear();
866 in_html_block = false;
867 }
868 if !in_code {
869 // Switching from content to code
870 if !current_paragraph.is_empty() {
871 blocks.push(Block::Paragraph(current_paragraph.clone()));
872 current_paragraph.clear();
873 }
874 in_code = true;
875 // Record whether there was a blank line before this code block
876 code_block_has_preceding_blank = had_preceding_blank;
877 }
878 current_code_block.push((content.clone(), *indent));
879 had_preceding_blank = false; // Reset after code
880 }
881 LineType::NestedListItem(content, indent) => {
882 if in_code {
883 // Switching from code to nested list
884 blocks.push(Block::Code {
885 lines: current_code_block.clone(),
886 has_preceding_blank: code_block_has_preceding_blank,
887 });
888 current_code_block.clear();
889 in_code = false;
890 } else if in_html_block {
891 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
892 blocks.push(Block::Html {
893 lines: current_html_block.clone(),
894 has_preceding_blank: html_block_has_preceding_blank,
895 });
896 current_html_block.clear();
897 html_tag_stack.clear();
898 in_html_block = false;
899 }
900 if !in_nested_list {
901 // Switching from content to nested list
902 if !current_paragraph.is_empty() {
903 blocks.push(Block::Paragraph(current_paragraph.clone()));
904 current_paragraph.clear();
905 }
906 in_nested_list = true;
907 }
908 current_nested_list.push((content.clone(), *indent));
909 had_preceding_blank = false; // Reset after nested list
910 }
911 LineType::SemanticLine(content) => {
912 // Semantic lines are standalone - flush any current block and add as separate block
913 if in_code {
914 blocks.push(Block::Code {
915 lines: current_code_block.clone(),
916 has_preceding_blank: code_block_has_preceding_blank,
917 });
918 current_code_block.clear();
919 in_code = false;
920 } else if in_nested_list {
921 blocks.push(Block::NestedList(current_nested_list.clone()));
922 current_nested_list.clear();
923 in_nested_list = false;
924 } else if in_html_block {
925 blocks.push(Block::Html {
926 lines: current_html_block.clone(),
927 has_preceding_blank: html_block_has_preceding_blank,
928 });
929 current_html_block.clear();
930 html_tag_stack.clear();
931 in_html_block = false;
932 } else if !current_paragraph.is_empty() {
933 blocks.push(Block::Paragraph(current_paragraph.clone()));
934 current_paragraph.clear();
935 }
936 // Add semantic line as its own block
937 blocks.push(Block::SemanticLine(content.clone()));
938 had_preceding_blank = false; // Reset after semantic line
939 }
940 }
941 }
942
943 // Push remaining block
944 if in_code && !current_code_block.is_empty() {
945 blocks.push(Block::Code {
946 lines: current_code_block,
947 has_preceding_blank: code_block_has_preceding_blank,
948 });
949 } else if in_nested_list && !current_nested_list.is_empty() {
950 blocks.push(Block::NestedList(current_nested_list));
951 } else if in_html_block && !current_html_block.is_empty() {
952 // If we still have an unclosed HTML block, push it anyway
953 // (malformed HTML - missing closing tag)
954 blocks.push(Block::Html {
955 lines: current_html_block,
956 has_preceding_blank: html_block_has_preceding_blank,
957 });
958 } else if !current_paragraph.is_empty() {
959 blocks.push(Block::Paragraph(current_paragraph));
960 }
961
962 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
963 let content_lines: Vec<String> = list_item_lines
964 .iter()
965 .filter_map(|line| {
966 if let LineType::Content(s) = line {
967 Some(s.clone())
968 } else {
969 None
970 }
971 })
972 .collect();
973
974 // Check if we need to reflow this list item
975 // We check the combined content to see if it exceeds length limits
976 let combined_content = content_lines.join(" ").trim().to_string();
977 let full_line = format!("{marker}{combined_content}");
978
979 // Helper to check if we should reflow in normalize mode
980 let should_normalize = || {
981 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
982 // DO normalize if it has plain text content that spans multiple lines
983 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
984 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
985 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
986 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
987
988 // If we have nested lists, code blocks, or semantic lines but no paragraphs, don't normalize
989 if (has_nested_lists || has_code_blocks || has_semantic_lines) && !has_paragraphs {
990 return false;
991 }
992
993 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
994 if has_paragraphs {
995 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
996 if paragraph_count > 1 {
997 // Multiple paragraph blocks should be normalized
998 return true;
999 }
1000
1001 // Single paragraph block: normalize if it has multiple content lines
1002 if content_lines.len() > 1 {
1003 return true;
1004 }
1005 }
1006
1007 false
1008 };
1009
1010 let needs_reflow = match config.reflow_mode {
1011 ReflowMode::Normalize => {
1012 // Only reflow if:
1013 // 1. The combined line would exceed the limit, OR
1014 // 2. The list item should be normalized (has multi-line plain text)
1015 let combined_length = self.calculate_effective_length(&full_line);
1016 if combined_length > config.line_length.get() {
1017 true
1018 } else {
1019 should_normalize()
1020 }
1021 }
1022 ReflowMode::SentencePerLine => {
1023 // Check if list item has multiple sentences
1024 let sentences = split_into_sentences(&combined_content);
1025 sentences.len() > 1
1026 }
1027 ReflowMode::Default => {
1028 // In default mode, only reflow if any individual line exceeds limit
1029 // Check the original lines, not the combined content
1030 (list_start..i)
1031 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1032 }
1033 };
1034
1035 if needs_reflow {
1036 let start_range = line_index.whole_line_range(list_start + 1);
1037 let end_line = i - 1;
1038 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1039 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1040 } else {
1041 line_index.whole_line_range(end_line + 1)
1042 };
1043 let byte_range = start_range.start..end_range.end;
1044
1045 // Reflow each block (paragraphs only, preserve code blocks)
1046 // When line_length = 0 (no limit), use a very large value for reflow
1047 let reflow_line_length = if config.line_length.is_unlimited() {
1048 usize::MAX
1049 } else {
1050 config.line_length.get().saturating_sub(indent_size).max(1)
1051 };
1052 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1053 line_length: reflow_line_length,
1054 break_on_sentences: true,
1055 preserve_breaks: false,
1056 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1057 abbreviations: config.abbreviations.clone(),
1058 };
1059
1060 let mut result: Vec<String> = Vec::new();
1061 let mut is_first_block = true;
1062
1063 for (block_idx, block) in blocks.iter().enumerate() {
1064 match block {
1065 Block::Paragraph(para_lines) => {
1066 // Split the paragraph into segments at hard break boundaries
1067 // Each segment can be reflowed independently
1068 let segments = split_into_segments(para_lines);
1069
1070 for (segment_idx, segment) in segments.iter().enumerate() {
1071 // Check if this segment ends with a hard break and what type
1072 let hard_break_type = segment.last().and_then(|line| {
1073 let line = line.strip_suffix('\r').unwrap_or(line);
1074 if line.ends_with('\\') {
1075 Some("\\")
1076 } else if line.ends_with(" ") {
1077 Some(" ")
1078 } else {
1079 None
1080 }
1081 });
1082
1083 // Join and reflow the segment (removing the hard break marker for processing)
1084 let segment_for_reflow: Vec<String> = segment
1085 .iter()
1086 .map(|line| {
1087 // Strip hard break marker (2 spaces or backslash) for reflow processing
1088 if line.ends_with('\\') {
1089 line[..line.len() - 1].trim_end().to_string()
1090 } else if line.ends_with(" ") {
1091 line[..line.len() - 2].trim_end().to_string()
1092 } else {
1093 line.clone()
1094 }
1095 })
1096 .collect();
1097
1098 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1099 if !segment_text.is_empty() {
1100 let reflowed =
1101 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1102
1103 if is_first_block && segment_idx == 0 {
1104 // First segment of first block starts with marker
1105 result.push(format!("{marker}{}", reflowed[0]));
1106 for line in reflowed.iter().skip(1) {
1107 result.push(format!("{expected_indent}{line}"));
1108 }
1109 is_first_block = false;
1110 } else {
1111 // Subsequent segments
1112 for line in reflowed {
1113 result.push(format!("{expected_indent}{line}"));
1114 }
1115 }
1116
1117 // If this segment had a hard break, add it back to the last line
1118 // Preserve the original hard break format (backslash or two spaces)
1119 if let Some(break_marker) = hard_break_type
1120 && let Some(last_line) = result.last_mut()
1121 {
1122 last_line.push_str(break_marker);
1123 }
1124 }
1125 }
1126
1127 // Add blank line after paragraph block if there's a next block
1128 // BUT: check if next block is a code block that doesn't want a preceding blank
1129 if block_idx < blocks.len() - 1 {
1130 let next_block = &blocks[block_idx + 1];
1131 let should_add_blank = match next_block {
1132 Block::Code {
1133 has_preceding_blank, ..
1134 } => *has_preceding_blank,
1135 _ => true, // For all other blocks, add blank line
1136 };
1137 if should_add_blank {
1138 result.push(String::new());
1139 }
1140 }
1141 }
1142 Block::Code {
1143 lines: code_lines,
1144 has_preceding_blank: _,
1145 } => {
1146 // Preserve code blocks as-is with original indentation
1147 // NOTE: Blank line before code block is handled by the previous block
1148 // (see paragraph block's logic above)
1149
1150 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1151 if is_first_block && idx == 0 {
1152 // First line of first block gets marker
1153 result.push(format!(
1154 "{marker}{}",
1155 " ".repeat(orig_indent - marker_len) + content
1156 ));
1157 is_first_block = false;
1158 } else if content.is_empty() {
1159 result.push(String::new());
1160 } else {
1161 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1162 }
1163 }
1164 }
1165 Block::NestedList(nested_items) => {
1166 // Preserve nested list items as-is with original indentation
1167 if !is_first_block {
1168 result.push(String::new());
1169 }
1170
1171 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1172 if is_first_block && idx == 0 {
1173 // First line of first block gets marker
1174 result.push(format!(
1175 "{marker}{}",
1176 " ".repeat(orig_indent - marker_len) + content
1177 ));
1178 is_first_block = false;
1179 } else if content.is_empty() {
1180 result.push(String::new());
1181 } else {
1182 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1183 }
1184 }
1185
1186 // Add blank line after nested list if there's a next block
1187 // Check if next block is a code block that doesn't want a preceding blank
1188 if block_idx < blocks.len() - 1 {
1189 let next_block = &blocks[block_idx + 1];
1190 let should_add_blank = match next_block {
1191 Block::Code {
1192 has_preceding_blank, ..
1193 } => *has_preceding_blank,
1194 _ => true, // For all other blocks, add blank line
1195 };
1196 if should_add_blank {
1197 result.push(String::new());
1198 }
1199 }
1200 }
1201 Block::SemanticLine(content) => {
1202 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1203 // Add blank line before if not first block
1204 if !is_first_block {
1205 result.push(String::new());
1206 }
1207
1208 if is_first_block {
1209 // First block starts with marker
1210 result.push(format!("{marker}{content}"));
1211 is_first_block = false;
1212 } else {
1213 // Subsequent blocks use expected indent
1214 result.push(format!("{expected_indent}{content}"));
1215 }
1216
1217 // Add blank line after semantic line if there's a next block
1218 // Check if next block is a code block that doesn't want a preceding blank
1219 if block_idx < blocks.len() - 1 {
1220 let next_block = &blocks[block_idx + 1];
1221 let should_add_blank = match next_block {
1222 Block::Code {
1223 has_preceding_blank, ..
1224 } => *has_preceding_blank,
1225 _ => true, // For all other blocks, add blank line
1226 };
1227 if should_add_blank {
1228 result.push(String::new());
1229 }
1230 }
1231 }
1232 Block::Html {
1233 lines: html_lines,
1234 has_preceding_blank: _,
1235 } => {
1236 // Preserve HTML blocks exactly as-is with original indentation
1237 // NOTE: Blank line before HTML block is handled by the previous block
1238
1239 for (idx, line) in html_lines.iter().enumerate() {
1240 if is_first_block && idx == 0 {
1241 // First line of first block gets marker
1242 result.push(format!("{marker}{line}"));
1243 is_first_block = false;
1244 } else if line.is_empty() {
1245 // Preserve blank lines inside HTML blocks
1246 result.push(String::new());
1247 } else {
1248 // Preserve lines with their original content (already includes indentation)
1249 result.push(format!("{expected_indent}{line}"));
1250 }
1251 }
1252
1253 // Add blank line after HTML block if there's a next block
1254 if block_idx < blocks.len() - 1 {
1255 let next_block = &blocks[block_idx + 1];
1256 let should_add_blank = match next_block {
1257 Block::Code {
1258 has_preceding_blank, ..
1259 } => *has_preceding_blank,
1260 Block::Html {
1261 has_preceding_blank, ..
1262 } => *has_preceding_blank,
1263 _ => true, // For all other blocks, add blank line
1264 };
1265 if should_add_blank {
1266 result.push(String::new());
1267 }
1268 }
1269 }
1270 }
1271 }
1272
1273 let reflowed_text = result.join("\n");
1274
1275 // Preserve trailing newline
1276 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1277 format!("{reflowed_text}\n")
1278 } else {
1279 reflowed_text
1280 };
1281
1282 // Get the original text to compare
1283 let original_text = &ctx.content[byte_range.clone()];
1284
1285 // Only generate a warning if the replacement is different from the original
1286 if original_text != replacement {
1287 // Generate an appropriate message based on why reflow is needed
1288 let message = match config.reflow_mode {
1289 ReflowMode::SentencePerLine => {
1290 let num_sentences = split_into_sentences(&combined_content).len();
1291 let num_lines = content_lines.len();
1292 if num_lines == 1 {
1293 // Single line with multiple sentences
1294 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1295 } else {
1296 // Multiple lines - could be split sentences or mixed
1297 format!(
1298 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1299 )
1300 }
1301 }
1302 ReflowMode::Normalize => {
1303 let combined_length = self.calculate_effective_length(&full_line);
1304 if combined_length > config.line_length.get() {
1305 format!(
1306 "Line length {} exceeds {} characters",
1307 combined_length,
1308 config.line_length.get()
1309 )
1310 } else {
1311 "Multi-line content can be normalized".to_string()
1312 }
1313 }
1314 ReflowMode::Default => {
1315 let combined_length = self.calculate_effective_length(&full_line);
1316 format!(
1317 "Line length {} exceeds {} characters",
1318 combined_length,
1319 config.line_length.get()
1320 )
1321 }
1322 };
1323
1324 warnings.push(LintWarning {
1325 rule_name: Some(self.name().to_string()),
1326 message,
1327 line: list_start + 1,
1328 column: 1,
1329 end_line: end_line + 1,
1330 end_column: lines[end_line].len() + 1,
1331 severity: Severity::Warning,
1332 fix: Some(crate::rule::Fix {
1333 range: byte_range,
1334 replacement,
1335 }),
1336 });
1337 }
1338 }
1339 continue;
1340 }
1341
1342 // Found start of a paragraph - collect all lines in it
1343 let paragraph_start = i;
1344 let mut paragraph_lines = vec![lines[i]];
1345 i += 1;
1346
1347 while i < lines.len() {
1348 let next_line = lines[i];
1349 let next_line_num = i + 1;
1350 let next_trimmed = next_line.trim();
1351
1352 // Stop at paragraph boundaries
1353 if next_trimmed.is_empty()
1354 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1355 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1356 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1357 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1358 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1359 || (next_line_num > 0
1360 && next_line_num <= ctx.lines.len()
1361 && ctx.lines[next_line_num - 1].blockquote.is_some())
1362 || next_trimmed.starts_with('#')
1363 || TableUtils::is_potential_table_row(next_line)
1364 || is_list_item(next_trimmed)
1365 || is_horizontal_rule(next_trimmed)
1366 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1367 || is_template_directive_only(next_line)
1368 {
1369 break;
1370 }
1371
1372 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1373 if i > 0 && has_hard_break(lines[i - 1]) {
1374 // Don't include lines after hard breaks in the same paragraph
1375 break;
1376 }
1377
1378 paragraph_lines.push(next_line);
1379 i += 1;
1380 }
1381
1382 // Combine paragraph lines into a single string for processing
1383 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1384 let paragraph_text = paragraph_lines.join(" ");
1385
1386 // Skip reflowing if this paragraph contains definition list items
1387 // Definition lists are multi-line structures that should not be joined
1388 let contains_definition_list = paragraph_lines
1389 .iter()
1390 .any(|line| crate::utils::is_definition_list_item(line));
1391
1392 if contains_definition_list {
1393 // Don't reflow definition lists - skip this paragraph
1394 i = paragraph_start + paragraph_lines.len();
1395 continue;
1396 }
1397
1398 // Check if this paragraph needs reflowing
1399 let needs_reflow = match config.reflow_mode {
1400 ReflowMode::Normalize => {
1401 // In normalize mode, reflow multi-line paragraphs
1402 paragraph_lines.len() > 1
1403 }
1404 ReflowMode::SentencePerLine => {
1405 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1406 // Note: we check the joined text because sentences can span multiple lines
1407 let sentences = split_into_sentences(¶graph_text);
1408
1409 // Always reflow if multiple sentences on one line
1410 if sentences.len() > 1 {
1411 true
1412 } else if paragraph_lines.len() > 1 {
1413 // For single-sentence paragraphs spanning multiple lines:
1414 // Reflow if they COULD fit on one line (respecting line-length constraint)
1415 if config.line_length.is_unlimited() {
1416 // No line-length constraint - always join single sentences
1417 true
1418 } else {
1419 // Only join if it fits within line-length
1420 let effective_length = self.calculate_effective_length(¶graph_text);
1421 effective_length <= config.line_length.get()
1422 }
1423 } else {
1424 false
1425 }
1426 }
1427 ReflowMode::Default => {
1428 // In default mode, only reflow if lines exceed limit
1429 paragraph_lines
1430 .iter()
1431 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1432 }
1433 };
1434
1435 if needs_reflow {
1436 // Calculate byte range for this paragraph
1437 // Use whole_line_range for each line and combine
1438 let start_range = line_index.whole_line_range(paragraph_start + 1);
1439 let end_line = paragraph_start + paragraph_lines.len() - 1;
1440
1441 // For the last line, we want to preserve any trailing newline
1442 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1443 // Last line without trailing newline - use line_text_range
1444 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1445 } else {
1446 // Not the last line or has trailing newline - use whole_line_range
1447 line_index.whole_line_range(end_line + 1)
1448 };
1449
1450 let byte_range = start_range.start..end_range.end;
1451
1452 // Check if the paragraph ends with a hard break and what type
1453 let hard_break_type = paragraph_lines.last().and_then(|line| {
1454 let line = line.strip_suffix('\r').unwrap_or(line);
1455 if line.ends_with('\\') {
1456 Some("\\")
1457 } else if line.ends_with(" ") {
1458 Some(" ")
1459 } else {
1460 None
1461 }
1462 });
1463
1464 // Reflow the paragraph
1465 // When line_length = 0 (no limit), use a very large value for reflow
1466 let reflow_line_length = if config.line_length.is_unlimited() {
1467 usize::MAX
1468 } else {
1469 config.line_length.get()
1470 };
1471 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1472 line_length: reflow_line_length,
1473 break_on_sentences: true,
1474 preserve_breaks: false,
1475 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1476 abbreviations: config.abbreviations.clone(),
1477 };
1478 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1479
1480 // If the original paragraph ended with a hard break, preserve it
1481 // Preserve the original hard break format (backslash or two spaces)
1482 if let Some(break_marker) = hard_break_type
1483 && !reflowed.is_empty()
1484 {
1485 let last_idx = reflowed.len() - 1;
1486 if !has_hard_break(&reflowed[last_idx]) {
1487 reflowed[last_idx].push_str(break_marker);
1488 }
1489 }
1490
1491 let reflowed_text = reflowed.join("\n");
1492
1493 // Preserve trailing newline if the original paragraph had one
1494 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1495 format!("{reflowed_text}\n")
1496 } else {
1497 reflowed_text
1498 };
1499
1500 // Get the original text to compare
1501 let original_text = &ctx.content[byte_range.clone()];
1502
1503 // Only generate a warning if the replacement is different from the original
1504 if original_text != replacement {
1505 // Create warning with actual fix
1506 // In default mode, report the specific line that violates
1507 // In normalize mode, report the whole paragraph
1508 // In sentence-per-line mode, report the entire paragraph
1509 let (warning_line, warning_end_line) = match config.reflow_mode {
1510 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1511 ReflowMode::SentencePerLine => {
1512 // Highlight the entire paragraph that needs reformatting
1513 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1514 }
1515 ReflowMode::Default => {
1516 // Find the first line that exceeds the limit
1517 let mut violating_line = paragraph_start;
1518 for (idx, line) in paragraph_lines.iter().enumerate() {
1519 if self.calculate_effective_length(line) > config.line_length.get() {
1520 violating_line = paragraph_start + idx;
1521 break;
1522 }
1523 }
1524 (violating_line + 1, violating_line + 1)
1525 }
1526 };
1527
1528 warnings.push(LintWarning {
1529 rule_name: Some(self.name().to_string()),
1530 message: match config.reflow_mode {
1531 ReflowMode::Normalize => format!(
1532 "Paragraph could be normalized to use line length of {} characters",
1533 config.line_length.get()
1534 ),
1535 ReflowMode::SentencePerLine => {
1536 let num_sentences = split_into_sentences(¶graph_text).len();
1537 if paragraph_lines.len() == 1 {
1538 // Single line with multiple sentences
1539 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1540 } else {
1541 let num_lines = paragraph_lines.len();
1542 // Multiple lines - could be split sentences or mixed
1543 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1544 }
1545 },
1546 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1547 },
1548 line: warning_line,
1549 column: 1,
1550 end_line: warning_end_line,
1551 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1552 severity: Severity::Warning,
1553 fix: Some(crate::rule::Fix {
1554 range: byte_range,
1555 replacement,
1556 }),
1557 });
1558 }
1559 }
1560 }
1561
1562 warnings
1563 }
1564
1565 /// Calculate string length based on the configured length mode
1566 fn calculate_string_length(&self, s: &str) -> usize {
1567 match self.config.length_mode {
1568 LengthMode::Chars => s.chars().count(),
1569 LengthMode::Visual => s.width(),
1570 LengthMode::Bytes => s.len(),
1571 }
1572 }
1573
1574 /// Calculate effective line length excluding unbreakable URLs
1575 fn calculate_effective_length(&self, line: &str) -> usize {
1576 if self.config.strict {
1577 // In strict mode, count everything
1578 return self.calculate_string_length(line);
1579 }
1580
1581 // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1582 let bytes = line.as_bytes();
1583 if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1584 return self.calculate_string_length(line);
1585 }
1586
1587 // More precise check for URLs and links
1588 if !line.contains("http") && !line.contains('[') {
1589 return self.calculate_string_length(line);
1590 }
1591
1592 let mut effective_line = line.to_string();
1593
1594 // First handle markdown links to avoid double-counting URLs
1595 // Pattern: [text](very-long-url) -> [text](url)
1596 if line.contains('[') && line.contains("](") {
1597 for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1598 if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1599 && url.as_str().len() > 15
1600 {
1601 let replacement = format!("[{}](url)", text.as_str());
1602 effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1603 }
1604 }
1605 }
1606
1607 // Then replace bare URLs with a placeholder of reasonable length
1608 // This allows lines with long URLs to pass if the rest of the content is reasonable
1609 if effective_line.contains("http") {
1610 for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1611 let url = url_match.as_str();
1612 // Skip if this URL is already part of a markdown link we handled
1613 if !effective_line.contains(&format!("({url})")) {
1614 // Replace URL with placeholder that represents a "reasonable" URL length
1615 // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1616 let placeholder = "x".repeat(15.min(url.len()));
1617 effective_line = effective_line.replacen(url, &placeholder, 1);
1618 }
1619 }
1620 }
1621
1622 self.calculate_string_length(&effective_line)
1623 }
1624}