rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::range_utils::LineIndex;
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9 IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use crate::utils::table_utils::TableUtils;
12use crate::utils::text_reflow::split_into_sentences;
13use toml;
14
15mod helpers;
16pub mod md013_config;
17use helpers::{
18 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
19 split_into_segments, trim_preserving_hard_break,
20};
21pub use md013_config::MD013Config;
22use md013_config::{LengthMode, ReflowMode};
23
24#[cfg(test)]
25mod tests;
26use unicode_width::UnicodeWidthStr;
27
28#[derive(Clone, Default)]
29pub struct MD013LineLength {
30 pub(crate) config: MD013Config,
31}
32
33impl MD013LineLength {
34 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
35 Self {
36 config: MD013Config {
37 line_length: crate::types::LineLength::new(line_length),
38 code_blocks,
39 tables,
40 headings,
41 paragraphs: true, // Default to true for backwards compatibility
42 strict,
43 reflow: false,
44 reflow_mode: ReflowMode::default(),
45 length_mode: LengthMode::default(),
46 abbreviations: None,
47 },
48 }
49 }
50
51 pub fn from_config_struct(config: MD013Config) -> Self {
52 Self { config }
53 }
54
55 fn should_ignore_line(
56 &self,
57 line: &str,
58 _lines: &[&str],
59 current_line: usize,
60 ctx: &crate::lint_context::LintContext,
61 ) -> bool {
62 if self.config.strict {
63 return false;
64 }
65
66 // Quick check for common patterns before expensive regex
67 let trimmed = line.trim();
68
69 // Only skip if the entire line is a URL (quick check first)
70 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
71 return true;
72 }
73
74 // Only skip if the entire line is an image reference (quick check first)
75 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
76 return true;
77 }
78
79 // Only skip if the entire line is a link reference (quick check first)
80 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
81 return true;
82 }
83
84 // Code blocks with long strings (only check if in code block)
85 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
86 && !trimmed.is_empty()
87 && !line.contains(' ')
88 && !line.contains('\t')
89 {
90 return true;
91 }
92
93 false
94 }
95
96 /// Check if rule should skip based on provided config (used for inline config support)
97 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
98 // Skip if content is empty
99 if ctx.content.is_empty() {
100 return true;
101 }
102
103 // For sentence-per-line or normalize mode, never skip based on line length
104 if config.reflow
105 && (config.reflow_mode == ReflowMode::SentencePerLine || config.reflow_mode == ReflowMode::Normalize)
106 {
107 return false;
108 }
109
110 // Quick check: if total content is shorter than line limit, definitely skip
111 if ctx.content.len() <= config.line_length.get() {
112 return true;
113 }
114
115 // Skip if no line exceeds the limit
116 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
117 }
118}
119
120impl Rule for MD013LineLength {
121 fn name(&self) -> &'static str {
122 "MD013"
123 }
124
125 fn description(&self) -> &'static str {
126 "Line length should not be excessive"
127 }
128
129 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
130 let content = ctx.content;
131
132 // Parse inline configuration FIRST so we can use effective config for should_skip
133 let inline_config = crate::inline_config::InlineConfig::from_content(content);
134 let config_override = inline_config.get_rule_config("MD013");
135
136 // Apply configuration override if present
137 let effective_config = if let Some(json_config) = config_override {
138 if let Some(obj) = json_config.as_object() {
139 let mut config = self.config.clone();
140 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
141 config.line_length = crate::types::LineLength::new(line_length as usize);
142 }
143 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
144 config.code_blocks = code_blocks;
145 }
146 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
147 config.tables = tables;
148 }
149 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
150 config.headings = headings;
151 }
152 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
153 config.strict = strict;
154 }
155 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
156 config.reflow = reflow;
157 }
158 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
159 config.reflow_mode = match reflow_mode {
160 "default" => ReflowMode::Default,
161 "normalize" => ReflowMode::Normalize,
162 "sentence-per-line" => ReflowMode::SentencePerLine,
163 _ => ReflowMode::default(),
164 };
165 }
166 config
167 } else {
168 self.config.clone()
169 }
170 } else {
171 self.config.clone()
172 };
173
174 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
175 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
176 if self.should_skip_with_config(ctx, &effective_config)
177 && !(effective_config.reflow
178 && (effective_config.reflow_mode == ReflowMode::Normalize
179 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
180 {
181 return Ok(Vec::new());
182 }
183
184 // Direct implementation without DocumentStructure
185 let mut warnings = Vec::new();
186
187 // Special handling: line_length = 0 means "no line length limit"
188 // Skip all line length checks, but still allow reflow if enabled
189 let skip_length_checks = effective_config.line_length.is_unlimited();
190
191 // Pre-filter lines that could be problematic to avoid processing all lines
192 let mut candidate_lines = Vec::new();
193 if !skip_length_checks {
194 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
195 // Skip front matter - it should never be linted
196 if line_info.in_front_matter {
197 continue;
198 }
199
200 // Quick length check first
201 if line_info.byte_len > effective_config.line_length.get() {
202 candidate_lines.push(line_idx);
203 }
204 }
205 }
206
207 // If no candidate lines and not in normalize or sentence-per-line mode, early return
208 if candidate_lines.is_empty()
209 && !(effective_config.reflow
210 && (effective_config.reflow_mode == ReflowMode::Normalize
211 || effective_config.reflow_mode == ReflowMode::SentencePerLine))
212 {
213 return Ok(warnings);
214 }
215
216 // Use ctx.lines if available for better performance
217 let lines: Vec<&str> = if !ctx.lines.is_empty() {
218 ctx.lines.iter().map(|l| l.content(ctx.content)).collect()
219 } else {
220 content.lines().collect()
221 };
222
223 // Create a quick lookup set for heading lines
224 // We need this for both the heading skip check AND the paragraphs check
225 let heading_lines_set: std::collections::HashSet<usize> = ctx
226 .lines
227 .iter()
228 .enumerate()
229 .filter(|(_, line)| line.heading.is_some())
230 .map(|(idx, _)| idx + 1)
231 .collect();
232
233 // Use pre-computed table blocks from context
234 // We need this for both the table skip check AND the paragraphs check
235 let table_blocks = &ctx.table_blocks;
236 let mut table_lines_set = std::collections::HashSet::new();
237 for table in table_blocks {
238 table_lines_set.insert(table.header_line + 1);
239 table_lines_set.insert(table.delimiter_line + 1);
240 for &line in &table.content_lines {
241 table_lines_set.insert(line + 1);
242 }
243 }
244
245 // Process candidate lines for line length checks
246 for &line_idx in &candidate_lines {
247 let line_number = line_idx + 1;
248 let line = lines[line_idx];
249
250 // Calculate effective length excluding unbreakable URLs
251 let effective_length = self.calculate_effective_length(line);
252
253 // Use single line length limit for all content
254 let line_limit = effective_config.line_length.get();
255
256 // Skip short lines immediately (double-check after effective length calculation)
257 if effective_length <= line_limit {
258 continue;
259 }
260
261 // Skip mkdocstrings blocks (already handled by LintContext)
262 if ctx.lines[line_idx].in_mkdocstrings {
263 continue;
264 }
265
266 // Skip various block types efficiently
267 if !effective_config.strict {
268 // Skip setext heading underlines
269 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
270 continue;
271 }
272
273 // Skip block elements according to config flags
274 // The flags mean: true = check these elements, false = skip these elements
275 // So we skip when the flag is FALSE and the line is in that element type
276 if (!effective_config.headings && heading_lines_set.contains(&line_number))
277 || (!effective_config.code_blocks
278 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
279 || (!effective_config.tables && table_lines_set.contains(&line_number))
280 || ctx.lines[line_number - 1].blockquote.is_some()
281 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
282 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
283 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
284 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
285 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
286 {
287 continue;
288 }
289
290 // Check if this is a paragraph/regular text line
291 // If paragraphs = false, skip lines that are NOT in special blocks
292 if !effective_config.paragraphs {
293 let is_special_block = heading_lines_set.contains(&line_number)
294 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
295 || table_lines_set.contains(&line_number)
296 || ctx.lines[line_number - 1].blockquote.is_some()
297 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
298 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
299 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
300 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
301 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment);
302
303 // Skip regular paragraph text when paragraphs = false
304 if !is_special_block {
305 continue;
306 }
307 }
308
309 // Skip lines that are only a URL, image ref, or link ref
310 if self.should_ignore_line(line, &lines, line_idx, ctx) {
311 continue;
312 }
313 }
314
315 // In sentence-per-line mode, check if this is a single long sentence
316 // If so, emit a warning without a fix (user must manually rephrase)
317 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
318 let sentences = split_into_sentences(line.trim());
319 if sentences.len() == 1 {
320 // Single sentence that's too long - warn but don't auto-fix
321 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
322
323 let (start_line, start_col, end_line, end_col) =
324 calculate_excess_range(line_number, line, line_limit);
325
326 warnings.push(LintWarning {
327 rule_name: Some(self.name().to_string()),
328 message,
329 line: start_line,
330 column: start_col,
331 end_line,
332 end_column: end_col,
333 severity: Severity::Warning,
334 fix: None, // No auto-fix for long single sentences
335 });
336 continue;
337 }
338 // Multiple sentences will be handled by paragraph-based reflow
339 continue;
340 }
341
342 // Don't provide fix for individual lines when reflow is enabled
343 // Paragraph-based fixes will be handled separately
344 let fix = None;
345
346 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
347
348 // Calculate precise character range for the excess portion
349 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
350
351 warnings.push(LintWarning {
352 rule_name: Some(self.name().to_string()),
353 message,
354 line: start_line,
355 column: start_col,
356 end_line,
357 end_column: end_col,
358 severity: Severity::Warning,
359 fix,
360 });
361 }
362
363 // If reflow is enabled, generate paragraph-based fixes
364 if effective_config.reflow {
365 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, &lines);
366 // Merge paragraph warnings with line warnings, removing duplicates
367 for pw in paragraph_warnings {
368 // Remove any line warnings that overlap with this paragraph
369 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
370 warnings.push(pw);
371 }
372 }
373
374 Ok(warnings)
375 }
376
377 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
378 // For CLI usage, apply fixes from warnings
379 // LSP will use the warning-based fixes directly
380 let warnings = self.check(ctx)?;
381
382 // If there are no fixes, return content unchanged
383 if !warnings.iter().any(|w| w.fix.is_some()) {
384 return Ok(ctx.content.to_string());
385 }
386
387 // Apply warning-based fixes
388 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
389 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
390 }
391
392 fn as_any(&self) -> &dyn std::any::Any {
393 self
394 }
395
396 fn category(&self) -> RuleCategory {
397 RuleCategory::Whitespace
398 }
399
400 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
401 self.should_skip_with_config(ctx, &self.config)
402 }
403
404 fn default_config_section(&self) -> Option<(String, toml::Value)> {
405 let default_config = MD013Config::default();
406 let json_value = serde_json::to_value(&default_config).ok()?;
407 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
408
409 if let toml::Value::Table(table) = toml_value {
410 if !table.is_empty() {
411 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
412 } else {
413 None
414 }
415 } else {
416 None
417 }
418 }
419
420 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
421 let mut aliases = std::collections::HashMap::new();
422 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
423 Some(aliases)
424 }
425
426 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
427 where
428 Self: Sized,
429 {
430 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
431 // Use global line_length if rule-specific config still has default value
432 if rule_config.line_length.get() == 80 {
433 rule_config.line_length = config.global.line_length;
434 }
435 Box::new(Self::from_config_struct(rule_config))
436 }
437}
438
439impl MD013LineLength {
440 /// Generate paragraph-based fixes
441 fn generate_paragraph_fixes(
442 &self,
443 ctx: &crate::lint_context::LintContext,
444 config: &MD013Config,
445 lines: &[&str],
446 ) -> Vec<LintWarning> {
447 let mut warnings = Vec::new();
448 let line_index = LineIndex::new(ctx.content);
449
450 let mut i = 0;
451 while i < lines.len() {
452 let line_num = i + 1;
453
454 // Skip special structures
455 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
456 info.in_code_block
457 || info.in_front_matter
458 || info.in_html_block
459 || info.in_html_comment
460 || info.in_esm_block
461 || info.in_jsx_expression
462 || info.in_mdx_comment
463 });
464
465 if should_skip_due_to_line_info
466 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
467 || lines[i].trim().starts_with('#')
468 || TableUtils::is_potential_table_row(lines[i])
469 || lines[i].trim().is_empty()
470 || is_horizontal_rule(lines[i].trim())
471 || is_template_directive_only(lines[i])
472 {
473 i += 1;
474 continue;
475 }
476
477 // Helper function to detect semantic line markers
478 let is_semantic_line = |content: &str| -> bool {
479 let trimmed = content.trim_start();
480 let semantic_markers = [
481 "NOTE:",
482 "WARNING:",
483 "IMPORTANT:",
484 "CAUTION:",
485 "TIP:",
486 "DANGER:",
487 "HINT:",
488 "INFO:",
489 ];
490 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
491 };
492
493 // Helper function to detect fence markers (opening or closing)
494 let is_fence_marker = |content: &str| -> bool {
495 let trimmed = content.trim_start();
496 trimmed.starts_with("```") || trimmed.starts_with("~~~")
497 };
498
499 // Check if this is a list item - handle it specially
500 let trimmed = lines[i].trim();
501 if is_list_item(trimmed) {
502 // Collect the entire list item including continuation lines
503 let list_start = i;
504 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
505 let marker_len = marker.len();
506
507 // Track lines and their types (content, code block, fence, nested list)
508 #[derive(Clone)]
509 enum LineType {
510 Content(String),
511 CodeBlock(String, usize), // content and original indent
512 NestedListItem(String, usize), // full line content and original indent
513 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
514 Empty,
515 }
516
517 let mut actual_indent: Option<usize> = None;
518 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
519 i += 1;
520
521 // Collect continuation lines using ctx.lines for metadata
522 while i < lines.len() {
523 let line_info = &ctx.lines[i];
524
525 // Use pre-computed is_blank from ctx
526 if line_info.is_blank {
527 // Empty line - check if next line is indented (part of list item)
528 if i + 1 < lines.len() {
529 let next_info = &ctx.lines[i + 1];
530
531 // Check if next line is indented enough to be continuation
532 if !next_info.is_blank && next_info.indent >= marker_len {
533 // This blank line is between paragraphs/blocks in the list item
534 list_item_lines.push(LineType::Empty);
535 i += 1;
536 continue;
537 }
538 }
539 // No indented line after blank, end of list item
540 break;
541 }
542
543 // Use pre-computed indent from ctx
544 let indent = line_info.indent;
545
546 // Valid continuation must be indented at least marker_len
547 if indent >= marker_len {
548 let trimmed = line_info.content(ctx.content).trim();
549
550 // Use pre-computed in_code_block from ctx
551 if line_info.in_code_block {
552 list_item_lines.push(LineType::CodeBlock(
553 line_info.content(ctx.content)[indent..].to_string(),
554 indent,
555 ));
556 i += 1;
557 continue;
558 }
559
560 // Check if this is a SIBLING list item (breaks parent)
561 // Nested lists are indented >= marker_len and are PART of the parent item
562 // Siblings are at indent < marker_len (at or before parent marker)
563 if is_list_item(trimmed) && indent < marker_len {
564 // This is a sibling item at same or higher level - end parent item
565 break;
566 }
567
568 // Check if this is a NESTED list item marker
569 // Nested lists should be processed separately UNLESS they're part of a
570 // multi-paragraph list item (indicated by a blank line before them OR
571 // it's a continuation of an already-started nested list)
572 if is_list_item(trimmed) && indent >= marker_len {
573 // Check if there was a blank line before this (multi-paragraph context)
574 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
575
576 // Check if we've already seen nested list content (another nested item)
577 let has_nested_content = list_item_lines.iter().any(|line| {
578 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
579 || matches!(line, LineType::NestedListItem(_, _))
580 });
581
582 if !has_blank_before && !has_nested_content {
583 // Single-paragraph context with no prior nested items: starts a new item
584 // End parent collection; nested list will be processed next
585 break;
586 }
587 // else: multi-paragraph context or continuation of nested list, keep collecting
588 // Mark this as a nested list item to preserve its structure
589 list_item_lines.push(LineType::NestedListItem(
590 line_info.content(ctx.content)[indent..].to_string(),
591 indent,
592 ));
593 i += 1;
594 continue;
595 }
596
597 // Normal continuation: marker_len to marker_len+3
598 if indent <= marker_len + 3 {
599 // Set actual_indent from first non-code continuation if not set
600 if actual_indent.is_none() {
601 actual_indent = Some(indent);
602 }
603
604 // Extract content (remove indentation and trailing whitespace)
605 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
606 // See: https://github.com/rvben/rumdl/issues/76
607 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
608
609 // Check if this is a fence marker (opening or closing)
610 // These should be treated as code block lines, not paragraph content
611 if is_fence_marker(&content) {
612 list_item_lines.push(LineType::CodeBlock(content, indent));
613 }
614 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
615 else if is_semantic_line(&content) {
616 list_item_lines.push(LineType::SemanticLine(content));
617 } else {
618 list_item_lines.push(LineType::Content(content));
619 }
620 i += 1;
621 } else {
622 // indent >= marker_len + 4: indented code block
623 list_item_lines.push(LineType::CodeBlock(
624 line_info.content(ctx.content)[indent..].to_string(),
625 indent,
626 ));
627 i += 1;
628 }
629 } else {
630 // Not indented enough, end of list item
631 break;
632 }
633 }
634
635 // Use detected indent or fallback to marker length
636 let indent_size = actual_indent.unwrap_or(marker_len);
637 let expected_indent = " ".repeat(indent_size);
638
639 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
640 #[derive(Clone)]
641 enum Block {
642 Paragraph(Vec<String>),
643 Code {
644 lines: Vec<(String, usize)>, // (content, indent) pairs
645 has_preceding_blank: bool, // Whether there was a blank line before this block
646 },
647 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
648 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
649 Html {
650 lines: Vec<String>, // HTML content preserved exactly as-is
651 has_preceding_blank: bool, // Whether there was a blank line before this block
652 },
653 }
654
655 // HTML tag detection helpers
656 // Block-level HTML tags that should trigger HTML block detection
657 const BLOCK_LEVEL_TAGS: &[&str] = &[
658 "div",
659 "details",
660 "summary",
661 "section",
662 "article",
663 "header",
664 "footer",
665 "nav",
666 "aside",
667 "main",
668 "table",
669 "thead",
670 "tbody",
671 "tfoot",
672 "tr",
673 "td",
674 "th",
675 "ul",
676 "ol",
677 "li",
678 "dl",
679 "dt",
680 "dd",
681 "pre",
682 "blockquote",
683 "figure",
684 "figcaption",
685 "form",
686 "fieldset",
687 "legend",
688 "hr",
689 "p",
690 "h1",
691 "h2",
692 "h3",
693 "h4",
694 "h5",
695 "h6",
696 "style",
697 "script",
698 "noscript",
699 ];
700
701 fn is_block_html_opening_tag(line: &str) -> Option<String> {
702 let trimmed = line.trim();
703
704 // Check for HTML comments
705 if trimmed.starts_with("<!--") {
706 return Some("!--".to_string());
707 }
708
709 // Check for opening tags
710 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
711 // Extract tag name from <tagname ...> or <tagname>
712 let after_bracket = &trimmed[1..];
713 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
714 let tag_name = after_bracket[..end].to_lowercase();
715
716 // Only treat as block if it's a known block-level tag
717 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
718 return Some(tag_name);
719 }
720 }
721 }
722 None
723 }
724
725 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
726 let trimmed = line.trim();
727
728 // Special handling for HTML comments
729 if tag_name == "!--" {
730 return trimmed.ends_with("-->");
731 }
732
733 // Check for closing tags: </tagname> or </tagname ...>
734 trimmed.starts_with(&format!("</{tag_name}>"))
735 || trimmed.starts_with(&format!("</{tag_name} "))
736 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
737 }
738
739 fn is_self_closing_tag(line: &str) -> bool {
740 let trimmed = line.trim();
741 trimmed.ends_with("/>")
742 }
743
744 let mut blocks: Vec<Block> = Vec::new();
745 let mut current_paragraph: Vec<String> = Vec::new();
746 let mut current_code_block: Vec<(String, usize)> = Vec::new();
747 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
748 let mut current_html_block: Vec<String> = Vec::new();
749 let mut html_tag_stack: Vec<String> = Vec::new();
750 let mut in_code = false;
751 let mut in_nested_list = false;
752 let mut in_html_block = false;
753 let mut had_preceding_blank = false; // Track if we just saw an empty line
754 let mut code_block_has_preceding_blank = false; // Track blank before current code block
755 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
756
757 for line in &list_item_lines {
758 match line {
759 LineType::Empty => {
760 if in_code {
761 current_code_block.push((String::new(), 0));
762 } else if in_nested_list {
763 current_nested_list.push((String::new(), 0));
764 } else if in_html_block {
765 // Allow blank lines inside HTML blocks
766 current_html_block.push(String::new());
767 } else if !current_paragraph.is_empty() {
768 blocks.push(Block::Paragraph(current_paragraph.clone()));
769 current_paragraph.clear();
770 }
771 // Mark that we saw a blank line
772 had_preceding_blank = true;
773 }
774 LineType::Content(content) => {
775 // Check if we're currently in an HTML block
776 if in_html_block {
777 current_html_block.push(content.clone());
778
779 // Check if this line closes any open HTML tags
780 if let Some(last_tag) = html_tag_stack.last() {
781 if is_html_closing_tag(content, last_tag) {
782 html_tag_stack.pop();
783
784 // If stack is empty, HTML block is complete
785 if html_tag_stack.is_empty() {
786 blocks.push(Block::Html {
787 lines: current_html_block.clone(),
788 has_preceding_blank: html_block_has_preceding_blank,
789 });
790 current_html_block.clear();
791 in_html_block = false;
792 }
793 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
794 // Nested opening tag within HTML block
795 if !is_self_closing_tag(content) {
796 html_tag_stack.push(new_tag);
797 }
798 }
799 }
800 had_preceding_blank = false;
801 } else {
802 // Not in HTML block - check if this line starts one
803 if let Some(tag_name) = is_block_html_opening_tag(content) {
804 // Flush current paragraph before starting HTML block
805 if in_code {
806 blocks.push(Block::Code {
807 lines: current_code_block.clone(),
808 has_preceding_blank: code_block_has_preceding_blank,
809 });
810 current_code_block.clear();
811 in_code = false;
812 } else if in_nested_list {
813 blocks.push(Block::NestedList(current_nested_list.clone()));
814 current_nested_list.clear();
815 in_nested_list = false;
816 } else if !current_paragraph.is_empty() {
817 blocks.push(Block::Paragraph(current_paragraph.clone()));
818 current_paragraph.clear();
819 }
820
821 // Start new HTML block
822 in_html_block = true;
823 html_block_has_preceding_blank = had_preceding_blank;
824 current_html_block.push(content.clone());
825
826 // Check if it's self-closing or needs a closing tag
827 if is_self_closing_tag(content) {
828 // Self-closing tag - complete the HTML block immediately
829 blocks.push(Block::Html {
830 lines: current_html_block.clone(),
831 has_preceding_blank: html_block_has_preceding_blank,
832 });
833 current_html_block.clear();
834 in_html_block = false;
835 } else {
836 // Regular opening tag - push to stack
837 html_tag_stack.push(tag_name);
838 }
839 } else {
840 // Regular content line - add to paragraph
841 if in_code {
842 // Switching from code to content
843 blocks.push(Block::Code {
844 lines: current_code_block.clone(),
845 has_preceding_blank: code_block_has_preceding_blank,
846 });
847 current_code_block.clear();
848 in_code = false;
849 } else if in_nested_list {
850 // Switching from nested list to content
851 blocks.push(Block::NestedList(current_nested_list.clone()));
852 current_nested_list.clear();
853 in_nested_list = false;
854 }
855 current_paragraph.push(content.clone());
856 }
857 had_preceding_blank = false; // Reset after content
858 }
859 }
860 LineType::CodeBlock(content, indent) => {
861 if in_nested_list {
862 // Switching from nested list to code
863 blocks.push(Block::NestedList(current_nested_list.clone()));
864 current_nested_list.clear();
865 in_nested_list = false;
866 } else if in_html_block {
867 // Switching from HTML block to code (shouldn't happen normally, but handle it)
868 blocks.push(Block::Html {
869 lines: current_html_block.clone(),
870 has_preceding_blank: html_block_has_preceding_blank,
871 });
872 current_html_block.clear();
873 html_tag_stack.clear();
874 in_html_block = false;
875 }
876 if !in_code {
877 // Switching from content to code
878 if !current_paragraph.is_empty() {
879 blocks.push(Block::Paragraph(current_paragraph.clone()));
880 current_paragraph.clear();
881 }
882 in_code = true;
883 // Record whether there was a blank line before this code block
884 code_block_has_preceding_blank = had_preceding_blank;
885 }
886 current_code_block.push((content.clone(), *indent));
887 had_preceding_blank = false; // Reset after code
888 }
889 LineType::NestedListItem(content, indent) => {
890 if in_code {
891 // Switching from code to nested list
892 blocks.push(Block::Code {
893 lines: current_code_block.clone(),
894 has_preceding_blank: code_block_has_preceding_blank,
895 });
896 current_code_block.clear();
897 in_code = false;
898 } else if in_html_block {
899 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
900 blocks.push(Block::Html {
901 lines: current_html_block.clone(),
902 has_preceding_blank: html_block_has_preceding_blank,
903 });
904 current_html_block.clear();
905 html_tag_stack.clear();
906 in_html_block = false;
907 }
908 if !in_nested_list {
909 // Switching from content to nested list
910 if !current_paragraph.is_empty() {
911 blocks.push(Block::Paragraph(current_paragraph.clone()));
912 current_paragraph.clear();
913 }
914 in_nested_list = true;
915 }
916 current_nested_list.push((content.clone(), *indent));
917 had_preceding_blank = false; // Reset after nested list
918 }
919 LineType::SemanticLine(content) => {
920 // Semantic lines are standalone - flush any current block and add as separate block
921 if in_code {
922 blocks.push(Block::Code {
923 lines: current_code_block.clone(),
924 has_preceding_blank: code_block_has_preceding_blank,
925 });
926 current_code_block.clear();
927 in_code = false;
928 } else if in_nested_list {
929 blocks.push(Block::NestedList(current_nested_list.clone()));
930 current_nested_list.clear();
931 in_nested_list = false;
932 } else if in_html_block {
933 blocks.push(Block::Html {
934 lines: current_html_block.clone(),
935 has_preceding_blank: html_block_has_preceding_blank,
936 });
937 current_html_block.clear();
938 html_tag_stack.clear();
939 in_html_block = false;
940 } else if !current_paragraph.is_empty() {
941 blocks.push(Block::Paragraph(current_paragraph.clone()));
942 current_paragraph.clear();
943 }
944 // Add semantic line as its own block
945 blocks.push(Block::SemanticLine(content.clone()));
946 had_preceding_blank = false; // Reset after semantic line
947 }
948 }
949 }
950
951 // Push remaining block
952 if in_code && !current_code_block.is_empty() {
953 blocks.push(Block::Code {
954 lines: current_code_block,
955 has_preceding_blank: code_block_has_preceding_blank,
956 });
957 } else if in_nested_list && !current_nested_list.is_empty() {
958 blocks.push(Block::NestedList(current_nested_list));
959 } else if in_html_block && !current_html_block.is_empty() {
960 // If we still have an unclosed HTML block, push it anyway
961 // (malformed HTML - missing closing tag)
962 blocks.push(Block::Html {
963 lines: current_html_block,
964 has_preceding_blank: html_block_has_preceding_blank,
965 });
966 } else if !current_paragraph.is_empty() {
967 blocks.push(Block::Paragraph(current_paragraph));
968 }
969
970 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
971 let content_lines: Vec<String> = list_item_lines
972 .iter()
973 .filter_map(|line| {
974 if let LineType::Content(s) = line {
975 Some(s.clone())
976 } else {
977 None
978 }
979 })
980 .collect();
981
982 // Check if we need to reflow this list item
983 // We check the combined content to see if it exceeds length limits
984 let combined_content = content_lines.join(" ").trim().to_string();
985 let full_line = format!("{marker}{combined_content}");
986
987 // Helper to check if we should reflow in normalize mode
988 let should_normalize = || {
989 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
990 // DO normalize if it has plain text content that spans multiple lines
991 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
992 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
993 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
994 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
995
996 // If we have nested lists, code blocks, or semantic lines but no paragraphs, don't normalize
997 if (has_nested_lists || has_code_blocks || has_semantic_lines) && !has_paragraphs {
998 return false;
999 }
1000
1001 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1002 if has_paragraphs {
1003 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1004 if paragraph_count > 1 {
1005 // Multiple paragraph blocks should be normalized
1006 return true;
1007 }
1008
1009 // Single paragraph block: normalize if it has multiple content lines
1010 if content_lines.len() > 1 {
1011 return true;
1012 }
1013 }
1014
1015 false
1016 };
1017
1018 let needs_reflow = match config.reflow_mode {
1019 ReflowMode::Normalize => {
1020 // Only reflow if:
1021 // 1. The combined line would exceed the limit, OR
1022 // 2. The list item should be normalized (has multi-line plain text)
1023 let combined_length = self.calculate_effective_length(&full_line);
1024 if combined_length > config.line_length.get() {
1025 true
1026 } else {
1027 should_normalize()
1028 }
1029 }
1030 ReflowMode::SentencePerLine => {
1031 // Check if list item has multiple sentences
1032 let sentences = split_into_sentences(&combined_content);
1033 sentences.len() > 1
1034 }
1035 ReflowMode::Default => {
1036 // In default mode, only reflow if any individual line exceeds limit
1037 // Check the original lines, not the combined content
1038 (list_start..i)
1039 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1040 }
1041 };
1042
1043 if needs_reflow {
1044 let start_range = line_index.whole_line_range(list_start + 1);
1045 let end_line = i - 1;
1046 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1047 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1048 } else {
1049 line_index.whole_line_range(end_line + 1)
1050 };
1051 let byte_range = start_range.start..end_range.end;
1052
1053 // Reflow each block (paragraphs only, preserve code blocks)
1054 // When line_length = 0 (no limit), use a very large value for reflow
1055 let reflow_line_length = if config.line_length.is_unlimited() {
1056 usize::MAX
1057 } else {
1058 config.line_length.get().saturating_sub(indent_size).max(1)
1059 };
1060 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1061 line_length: reflow_line_length,
1062 break_on_sentences: true,
1063 preserve_breaks: false,
1064 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1065 abbreviations: config.abbreviations.clone(),
1066 };
1067
1068 let mut result: Vec<String> = Vec::new();
1069 let mut is_first_block = true;
1070
1071 for (block_idx, block) in blocks.iter().enumerate() {
1072 match block {
1073 Block::Paragraph(para_lines) => {
1074 // Split the paragraph into segments at hard break boundaries
1075 // Each segment can be reflowed independently
1076 let segments = split_into_segments(para_lines);
1077
1078 for (segment_idx, segment) in segments.iter().enumerate() {
1079 // Check if this segment ends with a hard break and what type
1080 let hard_break_type = segment.last().and_then(|line| {
1081 let line = line.strip_suffix('\r').unwrap_or(line);
1082 if line.ends_with('\\') {
1083 Some("\\")
1084 } else if line.ends_with(" ") {
1085 Some(" ")
1086 } else {
1087 None
1088 }
1089 });
1090
1091 // Join and reflow the segment (removing the hard break marker for processing)
1092 let segment_for_reflow: Vec<String> = segment
1093 .iter()
1094 .map(|line| {
1095 // Strip hard break marker (2 spaces or backslash) for reflow processing
1096 if line.ends_with('\\') {
1097 line[..line.len() - 1].trim_end().to_string()
1098 } else if line.ends_with(" ") {
1099 line[..line.len() - 2].trim_end().to_string()
1100 } else {
1101 line.clone()
1102 }
1103 })
1104 .collect();
1105
1106 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1107 if !segment_text.is_empty() {
1108 let reflowed =
1109 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1110
1111 if is_first_block && segment_idx == 0 {
1112 // First segment of first block starts with marker
1113 result.push(format!("{marker}{}", reflowed[0]));
1114 for line in reflowed.iter().skip(1) {
1115 result.push(format!("{expected_indent}{line}"));
1116 }
1117 is_first_block = false;
1118 } else {
1119 // Subsequent segments
1120 for line in reflowed {
1121 result.push(format!("{expected_indent}{line}"));
1122 }
1123 }
1124
1125 // If this segment had a hard break, add it back to the last line
1126 // Preserve the original hard break format (backslash or two spaces)
1127 if let Some(break_marker) = hard_break_type
1128 && let Some(last_line) = result.last_mut()
1129 {
1130 last_line.push_str(break_marker);
1131 }
1132 }
1133 }
1134
1135 // Add blank line after paragraph block if there's a next block
1136 // BUT: check if next block is a code block that doesn't want a preceding blank
1137 if block_idx < blocks.len() - 1 {
1138 let next_block = &blocks[block_idx + 1];
1139 let should_add_blank = match next_block {
1140 Block::Code {
1141 has_preceding_blank, ..
1142 } => *has_preceding_blank,
1143 _ => true, // For all other blocks, add blank line
1144 };
1145 if should_add_blank {
1146 result.push(String::new());
1147 }
1148 }
1149 }
1150 Block::Code {
1151 lines: code_lines,
1152 has_preceding_blank: _,
1153 } => {
1154 // Preserve code blocks as-is with original indentation
1155 // NOTE: Blank line before code block is handled by the previous block
1156 // (see paragraph block's logic above)
1157
1158 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1159 if is_first_block && idx == 0 {
1160 // First line of first block gets marker
1161 result.push(format!(
1162 "{marker}{}",
1163 " ".repeat(orig_indent - marker_len) + content
1164 ));
1165 is_first_block = false;
1166 } else if content.is_empty() {
1167 result.push(String::new());
1168 } else {
1169 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1170 }
1171 }
1172 }
1173 Block::NestedList(nested_items) => {
1174 // Preserve nested list items as-is with original indentation
1175 if !is_first_block {
1176 result.push(String::new());
1177 }
1178
1179 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1180 if is_first_block && idx == 0 {
1181 // First line of first block gets marker
1182 result.push(format!(
1183 "{marker}{}",
1184 " ".repeat(orig_indent - marker_len) + content
1185 ));
1186 is_first_block = false;
1187 } else if content.is_empty() {
1188 result.push(String::new());
1189 } else {
1190 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1191 }
1192 }
1193
1194 // Add blank line after nested list if there's a next block
1195 // Check if next block is a code block that doesn't want a preceding blank
1196 if block_idx < blocks.len() - 1 {
1197 let next_block = &blocks[block_idx + 1];
1198 let should_add_blank = match next_block {
1199 Block::Code {
1200 has_preceding_blank, ..
1201 } => *has_preceding_blank,
1202 _ => true, // For all other blocks, add blank line
1203 };
1204 if should_add_blank {
1205 result.push(String::new());
1206 }
1207 }
1208 }
1209 Block::SemanticLine(content) => {
1210 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1211 // Add blank line before if not first block
1212 if !is_first_block {
1213 result.push(String::new());
1214 }
1215
1216 if is_first_block {
1217 // First block starts with marker
1218 result.push(format!("{marker}{content}"));
1219 is_first_block = false;
1220 } else {
1221 // Subsequent blocks use expected indent
1222 result.push(format!("{expected_indent}{content}"));
1223 }
1224
1225 // Add blank line after semantic line if there's a next block
1226 // Check if next block is a code block that doesn't want a preceding blank
1227 if block_idx < blocks.len() - 1 {
1228 let next_block = &blocks[block_idx + 1];
1229 let should_add_blank = match next_block {
1230 Block::Code {
1231 has_preceding_blank, ..
1232 } => *has_preceding_blank,
1233 _ => true, // For all other blocks, add blank line
1234 };
1235 if should_add_blank {
1236 result.push(String::new());
1237 }
1238 }
1239 }
1240 Block::Html {
1241 lines: html_lines,
1242 has_preceding_blank: _,
1243 } => {
1244 // Preserve HTML blocks exactly as-is with original indentation
1245 // NOTE: Blank line before HTML block is handled by the previous block
1246
1247 for (idx, line) in html_lines.iter().enumerate() {
1248 if is_first_block && idx == 0 {
1249 // First line of first block gets marker
1250 result.push(format!("{marker}{line}"));
1251 is_first_block = false;
1252 } else if line.is_empty() {
1253 // Preserve blank lines inside HTML blocks
1254 result.push(String::new());
1255 } else {
1256 // Preserve lines with their original content (already includes indentation)
1257 result.push(format!("{expected_indent}{line}"));
1258 }
1259 }
1260
1261 // Add blank line after HTML block if there's a next block
1262 if block_idx < blocks.len() - 1 {
1263 let next_block = &blocks[block_idx + 1];
1264 let should_add_blank = match next_block {
1265 Block::Code {
1266 has_preceding_blank, ..
1267 } => *has_preceding_blank,
1268 Block::Html {
1269 has_preceding_blank, ..
1270 } => *has_preceding_blank,
1271 _ => true, // For all other blocks, add blank line
1272 };
1273 if should_add_blank {
1274 result.push(String::new());
1275 }
1276 }
1277 }
1278 }
1279 }
1280
1281 let reflowed_text = result.join("\n");
1282
1283 // Preserve trailing newline
1284 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1285 format!("{reflowed_text}\n")
1286 } else {
1287 reflowed_text
1288 };
1289
1290 // Get the original text to compare
1291 let original_text = &ctx.content[byte_range.clone()];
1292
1293 // Only generate a warning if the replacement is different from the original
1294 if original_text != replacement {
1295 // Generate an appropriate message based on why reflow is needed
1296 let message = match config.reflow_mode {
1297 ReflowMode::SentencePerLine => {
1298 let num_sentences = split_into_sentences(&combined_content).len();
1299 let num_lines = content_lines.len();
1300 if num_lines == 1 {
1301 // Single line with multiple sentences
1302 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1303 } else {
1304 // Multiple lines - could be split sentences or mixed
1305 format!(
1306 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1307 )
1308 }
1309 }
1310 ReflowMode::Normalize => {
1311 let combined_length = self.calculate_effective_length(&full_line);
1312 if combined_length > config.line_length.get() {
1313 format!(
1314 "Line length {} exceeds {} characters",
1315 combined_length,
1316 config.line_length.get()
1317 )
1318 } else {
1319 "Multi-line content can be normalized".to_string()
1320 }
1321 }
1322 ReflowMode::Default => {
1323 let combined_length = self.calculate_effective_length(&full_line);
1324 format!(
1325 "Line length {} exceeds {} characters",
1326 combined_length,
1327 config.line_length.get()
1328 )
1329 }
1330 };
1331
1332 warnings.push(LintWarning {
1333 rule_name: Some(self.name().to_string()),
1334 message,
1335 line: list_start + 1,
1336 column: 1,
1337 end_line: end_line + 1,
1338 end_column: lines[end_line].len() + 1,
1339 severity: Severity::Warning,
1340 fix: Some(crate::rule::Fix {
1341 range: byte_range,
1342 replacement,
1343 }),
1344 });
1345 }
1346 }
1347 continue;
1348 }
1349
1350 // Found start of a paragraph - collect all lines in it
1351 let paragraph_start = i;
1352 let mut paragraph_lines = vec![lines[i]];
1353 i += 1;
1354
1355 while i < lines.len() {
1356 let next_line = lines[i];
1357 let next_line_num = i + 1;
1358 let next_trimmed = next_line.trim();
1359
1360 // Stop at paragraph boundaries
1361 if next_trimmed.is_empty()
1362 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1363 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1364 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1365 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1366 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1367 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1368 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1369 || (next_line_num > 0
1370 && next_line_num <= ctx.lines.len()
1371 && ctx.lines[next_line_num - 1].blockquote.is_some())
1372 || next_trimmed.starts_with('#')
1373 || TableUtils::is_potential_table_row(next_line)
1374 || is_list_item(next_trimmed)
1375 || is_horizontal_rule(next_trimmed)
1376 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1377 || is_template_directive_only(next_line)
1378 {
1379 break;
1380 }
1381
1382 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1383 if i > 0 && has_hard_break(lines[i - 1]) {
1384 // Don't include lines after hard breaks in the same paragraph
1385 break;
1386 }
1387
1388 paragraph_lines.push(next_line);
1389 i += 1;
1390 }
1391
1392 // Combine paragraph lines into a single string for processing
1393 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1394 let paragraph_text = paragraph_lines.join(" ");
1395
1396 // Skip reflowing if this paragraph contains definition list items
1397 // Definition lists are multi-line structures that should not be joined
1398 let contains_definition_list = paragraph_lines
1399 .iter()
1400 .any(|line| crate::utils::is_definition_list_item(line));
1401
1402 if contains_definition_list {
1403 // Don't reflow definition lists - skip this paragraph
1404 i = paragraph_start + paragraph_lines.len();
1405 continue;
1406 }
1407
1408 // Check if this paragraph needs reflowing
1409 let needs_reflow = match config.reflow_mode {
1410 ReflowMode::Normalize => {
1411 // In normalize mode, reflow multi-line paragraphs
1412 paragraph_lines.len() > 1
1413 }
1414 ReflowMode::SentencePerLine => {
1415 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1416 // Note: we check the joined text because sentences can span multiple lines
1417 let sentences = split_into_sentences(¶graph_text);
1418
1419 // Always reflow if multiple sentences on one line
1420 if sentences.len() > 1 {
1421 true
1422 } else if paragraph_lines.len() > 1 {
1423 // For single-sentence paragraphs spanning multiple lines:
1424 // Reflow if they COULD fit on one line (respecting line-length constraint)
1425 if config.line_length.is_unlimited() {
1426 // No line-length constraint - always join single sentences
1427 true
1428 } else {
1429 // Only join if it fits within line-length
1430 let effective_length = self.calculate_effective_length(¶graph_text);
1431 effective_length <= config.line_length.get()
1432 }
1433 } else {
1434 false
1435 }
1436 }
1437 ReflowMode::Default => {
1438 // In default mode, only reflow if lines exceed limit
1439 paragraph_lines
1440 .iter()
1441 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1442 }
1443 };
1444
1445 if needs_reflow {
1446 // Calculate byte range for this paragraph
1447 // Use whole_line_range for each line and combine
1448 let start_range = line_index.whole_line_range(paragraph_start + 1);
1449 let end_line = paragraph_start + paragraph_lines.len() - 1;
1450
1451 // For the last line, we want to preserve any trailing newline
1452 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1453 // Last line without trailing newline - use line_text_range
1454 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1455 } else {
1456 // Not the last line or has trailing newline - use whole_line_range
1457 line_index.whole_line_range(end_line + 1)
1458 };
1459
1460 let byte_range = start_range.start..end_range.end;
1461
1462 // Check if the paragraph ends with a hard break and what type
1463 let hard_break_type = paragraph_lines.last().and_then(|line| {
1464 let line = line.strip_suffix('\r').unwrap_or(line);
1465 if line.ends_with('\\') {
1466 Some("\\")
1467 } else if line.ends_with(" ") {
1468 Some(" ")
1469 } else {
1470 None
1471 }
1472 });
1473
1474 // Reflow the paragraph
1475 // When line_length = 0 (no limit), use a very large value for reflow
1476 let reflow_line_length = if config.line_length.is_unlimited() {
1477 usize::MAX
1478 } else {
1479 config.line_length.get()
1480 };
1481 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1482 line_length: reflow_line_length,
1483 break_on_sentences: true,
1484 preserve_breaks: false,
1485 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1486 abbreviations: config.abbreviations.clone(),
1487 };
1488 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1489
1490 // If the original paragraph ended with a hard break, preserve it
1491 // Preserve the original hard break format (backslash or two spaces)
1492 if let Some(break_marker) = hard_break_type
1493 && !reflowed.is_empty()
1494 {
1495 let last_idx = reflowed.len() - 1;
1496 if !has_hard_break(&reflowed[last_idx]) {
1497 reflowed[last_idx].push_str(break_marker);
1498 }
1499 }
1500
1501 let reflowed_text = reflowed.join("\n");
1502
1503 // Preserve trailing newline if the original paragraph had one
1504 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1505 format!("{reflowed_text}\n")
1506 } else {
1507 reflowed_text
1508 };
1509
1510 // Get the original text to compare
1511 let original_text = &ctx.content[byte_range.clone()];
1512
1513 // Only generate a warning if the replacement is different from the original
1514 if original_text != replacement {
1515 // Create warning with actual fix
1516 // In default mode, report the specific line that violates
1517 // In normalize mode, report the whole paragraph
1518 // In sentence-per-line mode, report the entire paragraph
1519 let (warning_line, warning_end_line) = match config.reflow_mode {
1520 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1521 ReflowMode::SentencePerLine => {
1522 // Highlight the entire paragraph that needs reformatting
1523 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1524 }
1525 ReflowMode::Default => {
1526 // Find the first line that exceeds the limit
1527 let mut violating_line = paragraph_start;
1528 for (idx, line) in paragraph_lines.iter().enumerate() {
1529 if self.calculate_effective_length(line) > config.line_length.get() {
1530 violating_line = paragraph_start + idx;
1531 break;
1532 }
1533 }
1534 (violating_line + 1, violating_line + 1)
1535 }
1536 };
1537
1538 warnings.push(LintWarning {
1539 rule_name: Some(self.name().to_string()),
1540 message: match config.reflow_mode {
1541 ReflowMode::Normalize => format!(
1542 "Paragraph could be normalized to use line length of {} characters",
1543 config.line_length.get()
1544 ),
1545 ReflowMode::SentencePerLine => {
1546 let num_sentences = split_into_sentences(¶graph_text).len();
1547 if paragraph_lines.len() == 1 {
1548 // Single line with multiple sentences
1549 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1550 } else {
1551 let num_lines = paragraph_lines.len();
1552 // Multiple lines - could be split sentences or mixed
1553 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1554 }
1555 },
1556 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1557 },
1558 line: warning_line,
1559 column: 1,
1560 end_line: warning_end_line,
1561 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1562 severity: Severity::Warning,
1563 fix: Some(crate::rule::Fix {
1564 range: byte_range,
1565 replacement,
1566 }),
1567 });
1568 }
1569 }
1570 }
1571
1572 warnings
1573 }
1574
1575 /// Calculate string length based on the configured length mode
1576 fn calculate_string_length(&self, s: &str) -> usize {
1577 match self.config.length_mode {
1578 LengthMode::Chars => s.chars().count(),
1579 LengthMode::Visual => s.width(),
1580 LengthMode::Bytes => s.len(),
1581 }
1582 }
1583
1584 /// Calculate effective line length excluding unbreakable URLs
1585 fn calculate_effective_length(&self, line: &str) -> usize {
1586 if self.config.strict {
1587 // In strict mode, count everything
1588 return self.calculate_string_length(line);
1589 }
1590
1591 // Quick byte-level check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
1592 let bytes = line.as_bytes();
1593 if !bytes.contains(&b'h') && !bytes.contains(&b'[') {
1594 return self.calculate_string_length(line);
1595 }
1596
1597 // More precise check for URLs and links
1598 if !line.contains("http") && !line.contains('[') {
1599 return self.calculate_string_length(line);
1600 }
1601
1602 let mut effective_line = line.to_string();
1603
1604 // First handle markdown links to avoid double-counting URLs
1605 // Pattern: [text](very-long-url) -> [text](url)
1606 if line.contains('[') && line.contains("](") {
1607 for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
1608 if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
1609 && url.as_str().len() > 15
1610 {
1611 let replacement = format!("[{}](url)", text.as_str());
1612 effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
1613 }
1614 }
1615 }
1616
1617 // Then replace bare URLs with a placeholder of reasonable length
1618 // This allows lines with long URLs to pass if the rest of the content is reasonable
1619 if effective_line.contains("http") {
1620 for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
1621 let url = url_match.as_str();
1622 // Skip if this URL is already part of a markdown link we handled
1623 if !effective_line.contains(&format!("({url})")) {
1624 // Replace URL with placeholder that represents a "reasonable" URL length
1625 // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
1626 let placeholder = "x".repeat(15.min(url.len()));
1627 effective_line = effective_line.replacen(url, &placeholder, 1);
1628 }
1629 }
1630 }
1631
1632 self.calculate_string_length(&effective_line)
1633 }
1634}