rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_list_item,
26 split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37 pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42 line_idx: usize,
43 data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48 Self {
49 config: MD013Config {
50 line_length: crate::types::LineLength::new(line_length),
51 code_blocks,
52 tables,
53 headings,
54 paragraphs: true, // Default to true for backwards compatibility
55 strict,
56 reflow: false,
57 reflow_mode: ReflowMode::default(),
58 length_mode: LengthMode::default(),
59 abbreviations: Vec::new(),
60 },
61 }
62 }
63
64 pub fn from_config_struct(config: MD013Config) -> Self {
65 Self { config }
66 }
67
68 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
69 fn reflow_length_mode(&self) -> ReflowLengthMode {
70 match self.config.length_mode {
71 LengthMode::Chars => ReflowLengthMode::Chars,
72 LengthMode::Visual => ReflowLengthMode::Visual,
73 LengthMode::Bytes => ReflowLengthMode::Bytes,
74 }
75 }
76
77 fn should_ignore_line(
78 &self,
79 line: &str,
80 _lines: &[&str],
81 current_line: usize,
82 ctx: &crate::lint_context::LintContext,
83 ) -> bool {
84 if self.config.strict {
85 return false;
86 }
87
88 // Quick check for common patterns before expensive regex
89 let trimmed = line.trim();
90
91 // Only skip if the entire line is a URL (quick check first)
92 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
93 return true;
94 }
95
96 // Only skip if the entire line is an image reference (quick check first)
97 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
98 return true;
99 }
100
101 // Note: link reference definitions are handled as always-exempt (even in strict mode)
102 // in the main check loop, so they don't need to be checked here.
103
104 // Code blocks with long strings (only check if in code block)
105 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
106 && !trimmed.is_empty()
107 && !line.contains(' ')
108 && !line.contains('\t')
109 {
110 return true;
111 }
112
113 false
114 }
115
116 /// Check if rule should skip based on provided config (used for inline config support)
117 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
118 // Skip if content is empty
119 if ctx.content.is_empty() {
120 return true;
121 }
122
123 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
124 if config.reflow
125 && (config.reflow_mode == ReflowMode::SentencePerLine
126 || config.reflow_mode == ReflowMode::SemanticLineBreaks
127 || config.reflow_mode == ReflowMode::Normalize)
128 {
129 return false;
130 }
131
132 // Quick check: if total content is shorter than line limit, definitely skip
133 if ctx.content.len() <= config.line_length.get() {
134 return true;
135 }
136
137 // Skip if no line exceeds the limit
138 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
139 }
140}
141
142impl Rule for MD013LineLength {
143 fn name(&self) -> &'static str {
144 "MD013"
145 }
146
147 fn description(&self) -> &'static str {
148 "Line length should not be excessive"
149 }
150
151 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
152 // Use pre-parsed inline config from LintContext
153 let config_override = ctx.inline_config().get_rule_config("MD013");
154
155 // Apply configuration override if present
156 let effective_config = if let Some(json_config) = config_override {
157 if let Some(obj) = json_config.as_object() {
158 let mut config = self.config.clone();
159 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
160 config.line_length = crate::types::LineLength::new(line_length as usize);
161 }
162 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
163 config.code_blocks = code_blocks;
164 }
165 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
166 config.tables = tables;
167 }
168 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
169 config.headings = headings;
170 }
171 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
172 config.strict = strict;
173 }
174 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
175 config.reflow = reflow;
176 }
177 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
178 config.reflow_mode = match reflow_mode {
179 "default" => ReflowMode::Default,
180 "normalize" => ReflowMode::Normalize,
181 "sentence-per-line" => ReflowMode::SentencePerLine,
182 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
183 _ => ReflowMode::default(),
184 };
185 }
186 config
187 } else {
188 self.config.clone()
189 }
190 } else {
191 self.config.clone()
192 };
193
194 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
195 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
196 if self.should_skip_with_config(ctx, &effective_config)
197 && !(effective_config.reflow
198 && (effective_config.reflow_mode == ReflowMode::Normalize
199 || effective_config.reflow_mode == ReflowMode::SentencePerLine
200 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
201 {
202 return Ok(Vec::new());
203 }
204
205 // Direct implementation without DocumentStructure
206 let mut warnings = Vec::new();
207
208 // Special handling: line_length = 0 means "no line length limit"
209 // Skip all line length checks, but still allow reflow if enabled
210 let skip_length_checks = effective_config.line_length.is_unlimited();
211
212 // Pre-filter lines that could be problematic to avoid processing all lines
213 let mut candidate_lines = Vec::new();
214 if !skip_length_checks {
215 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
216 // Skip front matter - it should never be linted
217 if line_info.in_front_matter {
218 continue;
219 }
220
221 // Quick length check first
222 if line_info.byte_len > effective_config.line_length.get() {
223 candidate_lines.push(line_idx);
224 }
225 }
226 }
227
228 // If no candidate lines and not in normalize or sentence-per-line mode, early return
229 if candidate_lines.is_empty()
230 && !(effective_config.reflow
231 && (effective_config.reflow_mode == ReflowMode::Normalize
232 || effective_config.reflow_mode == ReflowMode::SentencePerLine
233 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
234 {
235 return Ok(warnings);
236 }
237
238 let lines = ctx.raw_lines();
239
240 // Create a quick lookup set for heading lines
241 // We need this for both the heading skip check AND the paragraphs check
242 let heading_lines_set: std::collections::HashSet<usize> = ctx
243 .lines
244 .iter()
245 .enumerate()
246 .filter(|(_, line)| line.heading.is_some())
247 .map(|(idx, _)| idx + 1)
248 .collect();
249
250 // Use pre-computed table blocks from context
251 // We need this for both the table skip check AND the paragraphs check
252 let table_blocks = &ctx.table_blocks;
253 let mut table_lines_set = std::collections::HashSet::new();
254 for table in table_blocks {
255 table_lines_set.insert(table.header_line + 1);
256 table_lines_set.insert(table.delimiter_line + 1);
257 for &line in &table.content_lines {
258 table_lines_set.insert(line + 1);
259 }
260 }
261
262 // Process candidate lines for line length checks
263 for &line_idx in &candidate_lines {
264 let line_number = line_idx + 1;
265 let line = lines[line_idx];
266
267 // Calculate actual line length (used in warning messages)
268 let effective_length = self.calculate_effective_length(line);
269
270 // Use single line length limit for all content
271 let line_limit = effective_config.line_length.get();
272
273 // In non-strict mode, forgive the trailing non-whitespace run.
274 // If the line only exceeds the limit because of a long token at the end
275 // (URL, link chain, identifier), it passes. This matches markdownlint's
276 // behavior: line.replace(/\S*$/u, "#")
277 let check_length = if effective_config.strict {
278 effective_length
279 } else {
280 match line.rfind(char::is_whitespace) {
281 Some(pos) => {
282 let ws_char = line[pos..].chars().next().unwrap();
283 let prefix_end = pos + ws_char.len_utf8();
284 self.calculate_string_length(&line[..prefix_end]) + 1
285 }
286 None => 1, // No whitespace — entire line is a single token
287 }
288 };
289
290 // Skip lines where the check length is within the limit
291 if check_length <= line_limit {
292 continue;
293 }
294
295 // Semantic link understanding: suppress when excess comes entirely from inline URLs
296 if !effective_config.strict {
297 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
298 if text_only_length <= line_limit {
299 continue;
300 }
301 }
302
303 // Skip mkdocstrings blocks (already handled by LintContext)
304 if ctx.lines[line_idx].in_mkdocstrings {
305 continue;
306 }
307
308 // Link reference definitions are always exempt, even in strict mode.
309 // There's no way to shorten them without breaking the URL.
310 {
311 let trimmed = line.trim();
312 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
313 continue;
314 }
315 }
316
317 // Skip various block types efficiently
318 if !effective_config.strict {
319 // Skip setext heading underlines
320 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
321 continue;
322 }
323
324 // Skip block elements according to config flags
325 // The flags mean: true = check these elements, false = skip these elements
326 // So we skip when the flag is FALSE and the line is in that element type
327 if (!effective_config.headings && heading_lines_set.contains(&line_number))
328 || (!effective_config.code_blocks
329 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
330 || (!effective_config.tables && table_lines_set.contains(&line_number))
331 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
332 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
333 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
334 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
335 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
336 {
337 continue;
338 }
339
340 // Check if this is a paragraph/regular text line
341 // If paragraphs = false, skip lines that are NOT in special blocks
342 if !effective_config.paragraphs {
343 let is_special_block = heading_lines_set.contains(&line_number)
344 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
345 || table_lines_set.contains(&line_number)
346 || ctx.lines[line_number - 1].blockquote.is_some()
347 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
348 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
349 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
350 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
351 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
352 || ctx
353 .line_info(line_number)
354 .is_some_and(|info| info.in_mkdocs_container());
355
356 // Skip regular paragraph text when paragraphs = false
357 if !is_special_block {
358 continue;
359 }
360 }
361
362 // Skip lines that are only a URL, image ref, or link ref
363 if self.should_ignore_line(line, lines, line_idx, ctx) {
364 continue;
365 }
366 }
367
368 // In sentence-per-line mode, check if this is a single long sentence
369 // If so, emit a warning without a fix (user must manually rephrase)
370 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
371 let sentences = split_into_sentences(line.trim());
372 if sentences.len() == 1 {
373 // Single sentence that's too long - warn but don't auto-fix
374 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
375
376 let (start_line, start_col, end_line, end_col) =
377 calculate_excess_range(line_number, line, line_limit);
378
379 warnings.push(LintWarning {
380 rule_name: Some(self.name().to_string()),
381 message,
382 line: start_line,
383 column: start_col,
384 end_line,
385 end_column: end_col,
386 severity: Severity::Warning,
387 fix: None, // No auto-fix for long single sentences
388 });
389 continue;
390 }
391 // Multiple sentences will be handled by paragraph-based reflow
392 continue;
393 }
394
395 // In semantic-line-breaks mode, skip per-line checks —
396 // all reflow is handled at the paragraph level with cascading splits
397 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
398 continue;
399 }
400
401 // Don't provide fix for individual lines when reflow is enabled
402 // Paragraph-based fixes will be handled separately
403 let fix = None;
404
405 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
406
407 // Calculate precise character range for the excess portion
408 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
409
410 warnings.push(LintWarning {
411 rule_name: Some(self.name().to_string()),
412 message,
413 line: start_line,
414 column: start_col,
415 end_line,
416 end_column: end_col,
417 severity: Severity::Warning,
418 fix,
419 });
420 }
421
422 // If reflow is enabled, generate paragraph-based fixes
423 if effective_config.reflow {
424 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
425 // Merge paragraph warnings with line warnings, removing duplicates
426 for pw in paragraph_warnings {
427 // Remove any line warnings that overlap with this paragraph
428 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
429 warnings.push(pw);
430 }
431 }
432
433 Ok(warnings)
434 }
435
436 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
437 // For CLI usage, apply fixes from warnings
438 // LSP will use the warning-based fixes directly
439 let warnings = self.check(ctx)?;
440
441 // If there are no fixes, return content unchanged
442 if !warnings.iter().any(|w| w.fix.is_some()) {
443 return Ok(ctx.content.to_string());
444 }
445
446 // Apply warning-based fixes
447 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
448 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
449 }
450
451 fn as_any(&self) -> &dyn std::any::Any {
452 self
453 }
454
455 fn category(&self) -> RuleCategory {
456 RuleCategory::Whitespace
457 }
458
459 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
460 self.should_skip_with_config(ctx, &self.config)
461 }
462
463 fn default_config_section(&self) -> Option<(String, toml::Value)> {
464 let default_config = MD013Config::default();
465 let json_value = serde_json::to_value(&default_config).ok()?;
466 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
467
468 if let toml::Value::Table(table) = toml_value {
469 if !table.is_empty() {
470 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
471 } else {
472 None
473 }
474 } else {
475 None
476 }
477 }
478
479 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
480 let mut aliases = std::collections::HashMap::new();
481 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
482 Some(aliases)
483 }
484
485 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
486 where
487 Self: Sized,
488 {
489 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
490 // Use global line_length if rule-specific config still has default value
491 if rule_config.line_length.get() == 80 {
492 rule_config.line_length = config.global.line_length;
493 }
494 Box::new(Self::from_config_struct(rule_config))
495 }
496}
497
498impl MD013LineLength {
499 fn is_blockquote_content_boundary(
500 &self,
501 content: &str,
502 line_num: usize,
503 ctx: &crate::lint_context::LintContext,
504 ) -> bool {
505 let trimmed = content.trim();
506
507 trimmed.is_empty()
508 || ctx.line_info(line_num).is_some_and(|info| {
509 info.in_code_block
510 || info.in_front_matter
511 || info.in_html_block
512 || info.in_html_comment
513 || info.in_esm_block
514 || info.in_jsx_expression
515 || info.in_mdx_comment
516 || info.in_mkdocstrings
517 || info.in_mkdocs_container()
518 || info.is_div_marker
519 })
520 || trimmed.starts_with('#')
521 || trimmed.starts_with("```")
522 || trimmed.starts_with("~~~")
523 || trimmed.starts_with('>')
524 || TableUtils::is_potential_table_row(content)
525 || is_list_item(trimmed)
526 || is_horizontal_rule(trimmed)
527 || (trimmed.starts_with('[') && content.contains("]:"))
528 || is_template_directive_only(content)
529 || is_standalone_attr_list(content)
530 || is_snippet_block_delimiter(content)
531 || is_github_alert_marker(trimmed)
532 }
533
534 fn generate_blockquote_paragraph_fix(
535 &self,
536 ctx: &crate::lint_context::LintContext,
537 config: &MD013Config,
538 lines: &[&str],
539 line_index: &LineIndex,
540 start_idx: usize,
541 ) -> (Option<LintWarning>, usize) {
542 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
543 return (None, start_idx + 1);
544 };
545 let target_level = start_bq.nesting_level;
546
547 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
548 let mut i = start_idx;
549
550 while i < lines.len() {
551 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
552 break;
553 }
554
555 let line_num = i + 1;
556 if line_num > ctx.lines.len() {
557 break;
558 }
559
560 if lines[i].trim().is_empty() {
561 break;
562 }
563
564 let line_bq = ctx.lines[i].blockquote.as_deref();
565 if let Some(bq) = line_bq {
566 if bq.nesting_level != target_level {
567 break;
568 }
569
570 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
571 break;
572 }
573
574 collected.push(CollectedBlockquoteLine {
575 line_idx: i,
576 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
577 });
578 i += 1;
579 continue;
580 }
581
582 let lazy_content = lines[i].trim_start();
583 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
584 break;
585 }
586
587 collected.push(CollectedBlockquoteLine {
588 line_idx: i,
589 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
590 });
591 i += 1;
592 }
593
594 if collected.is_empty() {
595 return (None, start_idx + 1);
596 }
597
598 let next_idx = i;
599 let paragraph_start = collected[0].line_idx;
600 let end_line = collected[collected.len() - 1].line_idx;
601 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
602 let paragraph_text = line_data
603 .iter()
604 .map(|d| d.content.as_str())
605 .collect::<Vec<_>>()
606 .join(" ");
607
608 let contains_definition_list = line_data
609 .iter()
610 .any(|d| crate::utils::is_definition_list_item(&d.content));
611 if contains_definition_list {
612 return (None, next_idx);
613 }
614
615 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
616 if contains_snippets {
617 return (None, next_idx);
618 }
619
620 let needs_reflow = match config.reflow_mode {
621 ReflowMode::Normalize => line_data.len() > 1,
622 ReflowMode::SentencePerLine => {
623 let sentences = split_into_sentences(¶graph_text);
624 sentences.len() > 1 || line_data.len() > 1
625 }
626 ReflowMode::SemanticLineBreaks => {
627 let sentences = split_into_sentences(¶graph_text);
628 sentences.len() > 1
629 || line_data.len() > 1
630 || collected
631 .iter()
632 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
633 }
634 ReflowMode::Default => collected
635 .iter()
636 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
637 };
638
639 if !needs_reflow {
640 return (None, next_idx);
641 }
642
643 let fallback_prefix = start_bq.prefix.clone();
644 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
645 let continuation_style = blockquote_continuation_style(&line_data);
646
647 let reflow_line_length = if config.line_length.is_unlimited() {
648 usize::MAX
649 } else {
650 config
651 .line_length
652 .get()
653 .saturating_sub(self.calculate_string_length(&explicit_prefix))
654 .max(1)
655 };
656
657 let reflow_options = crate::utils::text_reflow::ReflowOptions {
658 line_length: reflow_line_length,
659 break_on_sentences: true,
660 preserve_breaks: false,
661 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
662 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
663 abbreviations: config.abbreviations_for_reflow(),
664 length_mode: self.reflow_length_mode(),
665 };
666
667 let reflowed_with_style =
668 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
669
670 if reflowed_with_style.is_empty() {
671 return (None, next_idx);
672 }
673
674 let reflowed_text = reflowed_with_style.join("\n");
675
676 let start_range = line_index.whole_line_range(paragraph_start + 1);
677 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
678 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
679 } else {
680 line_index.whole_line_range(end_line + 1)
681 };
682 let byte_range = start_range.start..end_range.end;
683
684 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
685 format!("{reflowed_text}\n")
686 } else {
687 reflowed_text
688 };
689
690 let original_text = &ctx.content[byte_range.clone()];
691 if original_text == replacement {
692 return (None, next_idx);
693 }
694
695 let (warning_line, warning_end_line) = match config.reflow_mode {
696 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
697 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
698 ReflowMode::Default => {
699 let violating_line = collected
700 .iter()
701 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
702 .map(|line| line.line_idx + 1)
703 .unwrap_or(paragraph_start + 1);
704 (violating_line, violating_line)
705 }
706 };
707
708 let warning = LintWarning {
709 rule_name: Some(self.name().to_string()),
710 message: match config.reflow_mode {
711 ReflowMode::Normalize => format!(
712 "Paragraph could be normalized to use line length of {} characters",
713 config.line_length.get()
714 ),
715 ReflowMode::SentencePerLine => {
716 let num_sentences = split_into_sentences(¶graph_text).len();
717 if line_data.len() == 1 {
718 format!("Line contains {num_sentences} sentences (one sentence per line required)")
719 } else {
720 let num_lines = line_data.len();
721 format!(
722 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
723 )
724 }
725 }
726 ReflowMode::SemanticLineBreaks => {
727 let num_sentences = split_into_sentences(¶graph_text).len();
728 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
729 }
730 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
731 },
732 line: warning_line,
733 column: 1,
734 end_line: warning_end_line,
735 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
736 severity: Severity::Warning,
737 fix: Some(crate::rule::Fix {
738 range: byte_range,
739 replacement,
740 }),
741 };
742
743 (Some(warning), next_idx)
744 }
745
746 /// Generate paragraph-based fixes
747 fn generate_paragraph_fixes(
748 &self,
749 ctx: &crate::lint_context::LintContext,
750 config: &MD013Config,
751 lines: &[&str],
752 ) -> Vec<LintWarning> {
753 let mut warnings = Vec::new();
754 let line_index = LineIndex::new(ctx.content);
755
756 let mut i = 0;
757 while i < lines.len() {
758 let line_num = i + 1;
759
760 // Handle blockquote paragraphs with style-preserving reflow.
761 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
762 let (warning, next_idx) = self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i);
763 if let Some(warning) = warning {
764 warnings.push(warning);
765 }
766 i = next_idx;
767 continue;
768 }
769
770 // Skip special structures (but NOT MkDocs containers - those get special handling)
771 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
772 info.in_code_block
773 || info.in_front_matter
774 || info.in_html_block
775 || info.in_html_comment
776 || info.in_esm_block
777 || info.in_jsx_expression
778 || info.in_mdx_comment
779 || info.in_mkdocstrings
780 });
781
782 if should_skip_due_to_line_info
783 || lines[i].trim().starts_with('#')
784 || TableUtils::is_potential_table_row(lines[i])
785 || lines[i].trim().is_empty()
786 || is_horizontal_rule(lines[i].trim())
787 || is_template_directive_only(lines[i])
788 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
789 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
790 {
791 i += 1;
792 continue;
793 }
794
795 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
796 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
797 // Skip admonition/tab marker lines — only reflow their indented content
798 let current_line = lines[i];
799 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
800 i += 1;
801 continue;
802 }
803
804 let container_start = i;
805
806 // Detect the actual indent level from the first content line
807 // (supports nested admonitions with 8+ spaces)
808 let first_line = lines[i];
809 let base_indent_len = first_line.len() - first_line.trim_start().len();
810 let base_indent: String = " ".repeat(base_indent_len);
811
812 // Collect consecutive MkDocs container paragraph lines
813 let mut container_lines: Vec<&str> = Vec::new();
814 while i < lines.len() {
815 let current_line_num = i + 1;
816 let line_info = ctx.line_info(current_line_num);
817
818 // Stop if we leave the MkDocs container
819 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
820 break;
821 }
822
823 let line = lines[i];
824
825 // Stop at paragraph boundaries within the container
826 if line.trim().is_empty() {
827 break;
828 }
829
830 // Skip list items, code blocks, headings within containers
831 if is_list_item(line.trim())
832 || line.trim().starts_with("```")
833 || line.trim().starts_with("~~~")
834 || line.trim().starts_with('#')
835 {
836 break;
837 }
838
839 container_lines.push(line);
840 i += 1;
841 }
842
843 if container_lines.is_empty() {
844 // Must advance i to avoid infinite loop when we encounter
845 // non-paragraph content (code block, list, heading, empty line)
846 // at the start of an MkDocs container
847 i += 1;
848 continue;
849 }
850
851 // Strip the base indent from each line and join for reflow
852 let stripped_lines: Vec<&str> = container_lines
853 .iter()
854 .map(|line| {
855 if line.starts_with(&base_indent) {
856 &line[base_indent_len..]
857 } else {
858 line.trim_start()
859 }
860 })
861 .collect();
862 let paragraph_text = stripped_lines.join(" ");
863
864 // Check if reflow is needed
865 let needs_reflow = match config.reflow_mode {
866 ReflowMode::Normalize => container_lines.len() > 1,
867 ReflowMode::SentencePerLine => {
868 let sentences = split_into_sentences(¶graph_text);
869 sentences.len() > 1 || container_lines.len() > 1
870 }
871 ReflowMode::SemanticLineBreaks => {
872 let sentences = split_into_sentences(¶graph_text);
873 sentences.len() > 1
874 || container_lines.len() > 1
875 || container_lines
876 .iter()
877 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
878 }
879 ReflowMode::Default => container_lines
880 .iter()
881 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
882 };
883
884 if !needs_reflow {
885 continue;
886 }
887
888 // Calculate byte range for this container paragraph
889 let start_range = line_index.whole_line_range(container_start + 1);
890 let end_line = container_start + container_lines.len() - 1;
891 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
892 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
893 } else {
894 line_index.whole_line_range(end_line + 1)
895 };
896 let byte_range = start_range.start..end_range.end;
897
898 // Reflow with adjusted line length (accounting for the 4-space indent)
899 let reflow_line_length = if config.line_length.is_unlimited() {
900 usize::MAX
901 } else {
902 config.line_length.get().saturating_sub(base_indent_len).max(1)
903 };
904 let reflow_options = crate::utils::text_reflow::ReflowOptions {
905 line_length: reflow_line_length,
906 break_on_sentences: true,
907 preserve_breaks: false,
908 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
909 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
910 abbreviations: config.abbreviations_for_reflow(),
911 length_mode: self.reflow_length_mode(),
912 };
913 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
914
915 // Re-add the 4-space indent to each reflowed line
916 let reflowed_with_indent: Vec<String> =
917 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
918 let reflowed_text = reflowed_with_indent.join("\n");
919
920 // Preserve trailing newline
921 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
922 format!("{reflowed_text}\n")
923 } else {
924 reflowed_text
925 };
926
927 // Only generate a warning if the replacement is different
928 let original_text = &ctx.content[byte_range.clone()];
929 if original_text != replacement {
930 warnings.push(LintWarning {
931 rule_name: Some(self.name().to_string()),
932 message: format!(
933 "Line length {} exceeds {} characters (in MkDocs container)",
934 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
935 config.line_length.get()
936 ),
937 line: container_start + 1,
938 column: 1,
939 end_line: end_line + 1,
940 end_column: lines[end_line].len() + 1,
941 severity: Severity::Warning,
942 fix: Some(crate::rule::Fix {
943 range: byte_range,
944 replacement,
945 }),
946 });
947 }
948 continue;
949 }
950
951 // Helper function to detect semantic line markers
952 let is_semantic_line = |content: &str| -> bool {
953 let trimmed = content.trim_start();
954 let semantic_markers = [
955 "NOTE:",
956 "WARNING:",
957 "IMPORTANT:",
958 "CAUTION:",
959 "TIP:",
960 "DANGER:",
961 "HINT:",
962 "INFO:",
963 ];
964 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
965 };
966
967 // Helper function to detect fence markers (opening or closing)
968 let is_fence_marker = |content: &str| -> bool {
969 let trimmed = content.trim_start();
970 trimmed.starts_with("```") || trimmed.starts_with("~~~")
971 };
972
973 // Check if this is a list item - handle it specially
974 let trimmed = lines[i].trim();
975 if is_list_item(trimmed) {
976 // Collect the entire list item including continuation lines
977 let list_start = i;
978 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
979 let marker_len = marker.len();
980
981 // Track lines and their types (content, code block, fence, nested list)
982 #[derive(Clone)]
983 enum LineType {
984 Content(String),
985 CodeBlock(String, usize), // content and original indent
986 NestedListItem(String, usize), // full line content and original indent
987 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
988 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
989 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
990 Empty,
991 }
992
993 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
994 i += 1;
995
996 // Collect continuation lines using ctx.lines for metadata
997 while i < lines.len() {
998 let line_info = &ctx.lines[i];
999
1000 // Use pre-computed is_blank from ctx
1001 if line_info.is_blank {
1002 // Empty line - check if next line is indented (part of list item)
1003 if i + 1 < lines.len() {
1004 let next_info = &ctx.lines[i + 1];
1005
1006 // Check if next line is indented enough to be continuation
1007 if !next_info.is_blank && next_info.indent >= marker_len {
1008 // This blank line is between paragraphs/blocks in the list item
1009 list_item_lines.push(LineType::Empty);
1010 i += 1;
1011 continue;
1012 }
1013 }
1014 // No indented line after blank, end of list item
1015 break;
1016 }
1017
1018 // Use pre-computed indent from ctx
1019 let indent = line_info.indent;
1020
1021 // Valid continuation must be indented at least marker_len
1022 if indent >= marker_len {
1023 let trimmed = line_info.content(ctx.content).trim();
1024
1025 // Use pre-computed in_code_block from ctx
1026 if line_info.in_code_block {
1027 list_item_lines.push(LineType::CodeBlock(
1028 line_info.content(ctx.content)[indent..].to_string(),
1029 indent,
1030 ));
1031 i += 1;
1032 continue;
1033 }
1034
1035 // Check if this is a SIBLING list item (breaks parent)
1036 // Nested lists are indented >= marker_len and are PART of the parent item
1037 // Siblings are at indent < marker_len (at or before parent marker)
1038 if is_list_item(trimmed) && indent < marker_len {
1039 // This is a sibling item at same or higher level - end parent item
1040 break;
1041 }
1042
1043 // Check if this is a NESTED list item marker
1044 // Nested lists should be processed separately UNLESS they're part of a
1045 // multi-paragraph list item (indicated by a blank line before them OR
1046 // it's a continuation of an already-started nested list)
1047 if is_list_item(trimmed) && indent >= marker_len {
1048 // Check if there was a blank line before this (multi-paragraph context)
1049 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
1050
1051 // Check if we've already seen nested list content (another nested item)
1052 let has_nested_content = list_item_lines.iter().any(|line| {
1053 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
1054 || matches!(line, LineType::NestedListItem(_, _))
1055 });
1056
1057 if !has_blank_before && !has_nested_content {
1058 // Single-paragraph context with no prior nested items: starts a new item
1059 // End parent collection; nested list will be processed next
1060 break;
1061 }
1062 // else: multi-paragraph context or continuation of nested list, keep collecting
1063 // Mark this as a nested list item to preserve its structure
1064 list_item_lines.push(LineType::NestedListItem(
1065 line_info.content(ctx.content)[indent..].to_string(),
1066 indent,
1067 ));
1068 i += 1;
1069 continue;
1070 }
1071
1072 // Normal continuation: marker_len to marker_len+3
1073 if indent <= marker_len + 3 {
1074 // Extract content (remove indentation and trailing whitespace)
1075 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1076 // See: https://github.com/rvben/rumdl/issues/76
1077 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1078
1079 // Check if this is a div marker (::: opening or closing)
1080 // These must be preserved on their own line, not merged into paragraphs
1081 if line_info.is_div_marker {
1082 list_item_lines.push(LineType::DivMarker(content));
1083 }
1084 // Check if this is a fence marker (opening or closing)
1085 // These should be treated as code block lines, not paragraph content
1086 else if is_fence_marker(&content) {
1087 list_item_lines.push(LineType::CodeBlock(content, indent));
1088 }
1089 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1090 else if is_semantic_line(&content) {
1091 list_item_lines.push(LineType::SemanticLine(content));
1092 }
1093 // Check if this is a snippet block delimiter (-8<- or --8<--)
1094 // These must be preserved on their own lines for MkDocs Snippets extension
1095 else if is_snippet_block_delimiter(&content) {
1096 list_item_lines.push(LineType::SnippetLine(content));
1097 } else {
1098 list_item_lines.push(LineType::Content(content));
1099 }
1100 i += 1;
1101 } else {
1102 // indent >= marker_len + 4: indented code block
1103 list_item_lines.push(LineType::CodeBlock(
1104 line_info.content(ctx.content)[indent..].to_string(),
1105 indent,
1106 ));
1107 i += 1;
1108 }
1109 } else {
1110 // Not indented enough, end of list item
1111 break;
1112 }
1113 }
1114
1115 let indent_size = marker_len;
1116 let expected_indent = " ".repeat(indent_size);
1117
1118 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1119 #[derive(Clone)]
1120 enum Block {
1121 Paragraph(Vec<String>),
1122 Code {
1123 lines: Vec<(String, usize)>, // (content, indent) pairs
1124 has_preceding_blank: bool, // Whether there was a blank line before this block
1125 },
1126 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
1127 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1128 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
1129 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1130 Html {
1131 lines: Vec<String>, // HTML content preserved exactly as-is
1132 has_preceding_blank: bool, // Whether there was a blank line before this block
1133 },
1134 }
1135
1136 // HTML tag detection helpers
1137 // Block-level HTML tags that should trigger HTML block detection
1138 const BLOCK_LEVEL_TAGS: &[&str] = &[
1139 "div",
1140 "details",
1141 "summary",
1142 "section",
1143 "article",
1144 "header",
1145 "footer",
1146 "nav",
1147 "aside",
1148 "main",
1149 "table",
1150 "thead",
1151 "tbody",
1152 "tfoot",
1153 "tr",
1154 "td",
1155 "th",
1156 "ul",
1157 "ol",
1158 "li",
1159 "dl",
1160 "dt",
1161 "dd",
1162 "pre",
1163 "blockquote",
1164 "figure",
1165 "figcaption",
1166 "form",
1167 "fieldset",
1168 "legend",
1169 "hr",
1170 "p",
1171 "h1",
1172 "h2",
1173 "h3",
1174 "h4",
1175 "h5",
1176 "h6",
1177 "style",
1178 "script",
1179 "noscript",
1180 ];
1181
1182 fn is_block_html_opening_tag(line: &str) -> Option<String> {
1183 let trimmed = line.trim();
1184
1185 // Check for HTML comments
1186 if trimmed.starts_with("<!--") {
1187 return Some("!--".to_string());
1188 }
1189
1190 // Check for opening tags
1191 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1192 // Extract tag name from <tagname ...> or <tagname>
1193 let after_bracket = &trimmed[1..];
1194 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1195 let tag_name = after_bracket[..end].to_lowercase();
1196
1197 // Only treat as block if it's a known block-level tag
1198 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1199 return Some(tag_name);
1200 }
1201 }
1202 }
1203 None
1204 }
1205
1206 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1207 let trimmed = line.trim();
1208
1209 // Special handling for HTML comments
1210 if tag_name == "!--" {
1211 return trimmed.ends_with("-->");
1212 }
1213
1214 // Check for closing tags: </tagname> or </tagname ...>
1215 trimmed.starts_with(&format!("</{tag_name}>"))
1216 || trimmed.starts_with(&format!("</{tag_name} "))
1217 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1218 }
1219
1220 fn is_self_closing_tag(line: &str) -> bool {
1221 let trimmed = line.trim();
1222 trimmed.ends_with("/>")
1223 }
1224
1225 let mut blocks: Vec<Block> = Vec::new();
1226 let mut current_paragraph: Vec<String> = Vec::new();
1227 let mut current_code_block: Vec<(String, usize)> = Vec::new();
1228 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
1229 let mut current_html_block: Vec<String> = Vec::new();
1230 let mut html_tag_stack: Vec<String> = Vec::new();
1231 let mut in_code = false;
1232 let mut in_nested_list = false;
1233 let mut in_html_block = false;
1234 let mut had_preceding_blank = false; // Track if we just saw an empty line
1235 let mut code_block_has_preceding_blank = false; // Track blank before current code block
1236 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1237
1238 for line in &list_item_lines {
1239 match line {
1240 LineType::Empty => {
1241 if in_code {
1242 current_code_block.push((String::new(), 0));
1243 } else if in_nested_list {
1244 current_nested_list.push((String::new(), 0));
1245 } else if in_html_block {
1246 // Allow blank lines inside HTML blocks
1247 current_html_block.push(String::new());
1248 } else if !current_paragraph.is_empty() {
1249 blocks.push(Block::Paragraph(current_paragraph.clone()));
1250 current_paragraph.clear();
1251 }
1252 // Mark that we saw a blank line
1253 had_preceding_blank = true;
1254 }
1255 LineType::Content(content) => {
1256 // Check if we're currently in an HTML block
1257 if in_html_block {
1258 current_html_block.push(content.clone());
1259
1260 // Check if this line closes any open HTML tags
1261 if let Some(last_tag) = html_tag_stack.last() {
1262 if is_html_closing_tag(content, last_tag) {
1263 html_tag_stack.pop();
1264
1265 // If stack is empty, HTML block is complete
1266 if html_tag_stack.is_empty() {
1267 blocks.push(Block::Html {
1268 lines: current_html_block.clone(),
1269 has_preceding_blank: html_block_has_preceding_blank,
1270 });
1271 current_html_block.clear();
1272 in_html_block = false;
1273 }
1274 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1275 // Nested opening tag within HTML block
1276 if !is_self_closing_tag(content) {
1277 html_tag_stack.push(new_tag);
1278 }
1279 }
1280 }
1281 had_preceding_blank = false;
1282 } else {
1283 // Not in HTML block - check if this line starts one
1284 if let Some(tag_name) = is_block_html_opening_tag(content) {
1285 // Flush current paragraph before starting HTML block
1286 if in_code {
1287 blocks.push(Block::Code {
1288 lines: current_code_block.clone(),
1289 has_preceding_blank: code_block_has_preceding_blank,
1290 });
1291 current_code_block.clear();
1292 in_code = false;
1293 } else if in_nested_list {
1294 blocks.push(Block::NestedList(current_nested_list.clone()));
1295 current_nested_list.clear();
1296 in_nested_list = false;
1297 } else if !current_paragraph.is_empty() {
1298 blocks.push(Block::Paragraph(current_paragraph.clone()));
1299 current_paragraph.clear();
1300 }
1301
1302 // Start new HTML block
1303 in_html_block = true;
1304 html_block_has_preceding_blank = had_preceding_blank;
1305 current_html_block.push(content.clone());
1306
1307 // Check if it's self-closing or needs a closing tag
1308 if is_self_closing_tag(content) {
1309 // Self-closing tag - complete the HTML block immediately
1310 blocks.push(Block::Html {
1311 lines: current_html_block.clone(),
1312 has_preceding_blank: html_block_has_preceding_blank,
1313 });
1314 current_html_block.clear();
1315 in_html_block = false;
1316 } else {
1317 // Regular opening tag - push to stack
1318 html_tag_stack.push(tag_name);
1319 }
1320 } else {
1321 // Regular content line - add to paragraph
1322 if in_code {
1323 // Switching from code to content
1324 blocks.push(Block::Code {
1325 lines: current_code_block.clone(),
1326 has_preceding_blank: code_block_has_preceding_blank,
1327 });
1328 current_code_block.clear();
1329 in_code = false;
1330 } else if in_nested_list {
1331 // Switching from nested list to content
1332 blocks.push(Block::NestedList(current_nested_list.clone()));
1333 current_nested_list.clear();
1334 in_nested_list = false;
1335 }
1336 current_paragraph.push(content.clone());
1337 }
1338 had_preceding_blank = false; // Reset after content
1339 }
1340 }
1341 LineType::CodeBlock(content, indent) => {
1342 if in_nested_list {
1343 // Switching from nested list to code
1344 blocks.push(Block::NestedList(current_nested_list.clone()));
1345 current_nested_list.clear();
1346 in_nested_list = false;
1347 } else if in_html_block {
1348 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1349 blocks.push(Block::Html {
1350 lines: current_html_block.clone(),
1351 has_preceding_blank: html_block_has_preceding_blank,
1352 });
1353 current_html_block.clear();
1354 html_tag_stack.clear();
1355 in_html_block = false;
1356 }
1357 if !in_code {
1358 // Switching from content to code
1359 if !current_paragraph.is_empty() {
1360 blocks.push(Block::Paragraph(current_paragraph.clone()));
1361 current_paragraph.clear();
1362 }
1363 in_code = true;
1364 // Record whether there was a blank line before this code block
1365 code_block_has_preceding_blank = had_preceding_blank;
1366 }
1367 current_code_block.push((content.clone(), *indent));
1368 had_preceding_blank = false; // Reset after code
1369 }
1370 LineType::NestedListItem(content, indent) => {
1371 if in_code {
1372 // Switching from code to nested list
1373 blocks.push(Block::Code {
1374 lines: current_code_block.clone(),
1375 has_preceding_blank: code_block_has_preceding_blank,
1376 });
1377 current_code_block.clear();
1378 in_code = false;
1379 } else if in_html_block {
1380 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1381 blocks.push(Block::Html {
1382 lines: current_html_block.clone(),
1383 has_preceding_blank: html_block_has_preceding_blank,
1384 });
1385 current_html_block.clear();
1386 html_tag_stack.clear();
1387 in_html_block = false;
1388 }
1389 if !in_nested_list {
1390 // Switching from content to nested list
1391 if !current_paragraph.is_empty() {
1392 blocks.push(Block::Paragraph(current_paragraph.clone()));
1393 current_paragraph.clear();
1394 }
1395 in_nested_list = true;
1396 }
1397 current_nested_list.push((content.clone(), *indent));
1398 had_preceding_blank = false; // Reset after nested list
1399 }
1400 LineType::SemanticLine(content) => {
1401 // Semantic lines are standalone - flush any current block and add as separate block
1402 if in_code {
1403 blocks.push(Block::Code {
1404 lines: current_code_block.clone(),
1405 has_preceding_blank: code_block_has_preceding_blank,
1406 });
1407 current_code_block.clear();
1408 in_code = false;
1409 } else if in_nested_list {
1410 blocks.push(Block::NestedList(current_nested_list.clone()));
1411 current_nested_list.clear();
1412 in_nested_list = false;
1413 } else if in_html_block {
1414 blocks.push(Block::Html {
1415 lines: current_html_block.clone(),
1416 has_preceding_blank: html_block_has_preceding_blank,
1417 });
1418 current_html_block.clear();
1419 html_tag_stack.clear();
1420 in_html_block = false;
1421 } else if !current_paragraph.is_empty() {
1422 blocks.push(Block::Paragraph(current_paragraph.clone()));
1423 current_paragraph.clear();
1424 }
1425 // Add semantic line as its own block
1426 blocks.push(Block::SemanticLine(content.clone()));
1427 had_preceding_blank = false; // Reset after semantic line
1428 }
1429 LineType::SnippetLine(content) => {
1430 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1431 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1432 if in_code {
1433 blocks.push(Block::Code {
1434 lines: current_code_block.clone(),
1435 has_preceding_blank: code_block_has_preceding_blank,
1436 });
1437 current_code_block.clear();
1438 in_code = false;
1439 } else if in_nested_list {
1440 blocks.push(Block::NestedList(current_nested_list.clone()));
1441 current_nested_list.clear();
1442 in_nested_list = false;
1443 } else if in_html_block {
1444 blocks.push(Block::Html {
1445 lines: current_html_block.clone(),
1446 has_preceding_blank: html_block_has_preceding_blank,
1447 });
1448 current_html_block.clear();
1449 html_tag_stack.clear();
1450 in_html_block = false;
1451 } else if !current_paragraph.is_empty() {
1452 blocks.push(Block::Paragraph(current_paragraph.clone()));
1453 current_paragraph.clear();
1454 }
1455 // Add snippet line as its own block
1456 blocks.push(Block::SnippetLine(content.clone()));
1457 had_preceding_blank = false;
1458 }
1459 LineType::DivMarker(content) => {
1460 // Div markers (::: opening or closing) are standalone structural delimiters
1461 // Flush any current block and add as separate block
1462 if in_code {
1463 blocks.push(Block::Code {
1464 lines: current_code_block.clone(),
1465 has_preceding_blank: code_block_has_preceding_blank,
1466 });
1467 current_code_block.clear();
1468 in_code = false;
1469 } else if in_nested_list {
1470 blocks.push(Block::NestedList(current_nested_list.clone()));
1471 current_nested_list.clear();
1472 in_nested_list = false;
1473 } else if in_html_block {
1474 blocks.push(Block::Html {
1475 lines: current_html_block.clone(),
1476 has_preceding_blank: html_block_has_preceding_blank,
1477 });
1478 current_html_block.clear();
1479 html_tag_stack.clear();
1480 in_html_block = false;
1481 } else if !current_paragraph.is_empty() {
1482 blocks.push(Block::Paragraph(current_paragraph.clone()));
1483 current_paragraph.clear();
1484 }
1485 blocks.push(Block::DivMarker(content.clone()));
1486 had_preceding_blank = false;
1487 }
1488 }
1489 }
1490
1491 // Push remaining block
1492 if in_code && !current_code_block.is_empty() {
1493 blocks.push(Block::Code {
1494 lines: current_code_block,
1495 has_preceding_blank: code_block_has_preceding_blank,
1496 });
1497 } else if in_nested_list && !current_nested_list.is_empty() {
1498 blocks.push(Block::NestedList(current_nested_list));
1499 } else if in_html_block && !current_html_block.is_empty() {
1500 // If we still have an unclosed HTML block, push it anyway
1501 // (malformed HTML - missing closing tag)
1502 blocks.push(Block::Html {
1503 lines: current_html_block,
1504 has_preceding_blank: html_block_has_preceding_blank,
1505 });
1506 } else if !current_paragraph.is_empty() {
1507 blocks.push(Block::Paragraph(current_paragraph));
1508 }
1509
1510 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1511 let content_lines: Vec<String> = list_item_lines
1512 .iter()
1513 .filter_map(|line| {
1514 if let LineType::Content(s) = line {
1515 Some(s.clone())
1516 } else {
1517 None
1518 }
1519 })
1520 .collect();
1521
1522 // Check if we need to reflow this list item
1523 // We check the combined content to see if it exceeds length limits
1524 let combined_content = content_lines.join(" ").trim().to_string();
1525 let full_line = format!("{marker}{combined_content}");
1526
1527 // Helper to check if we should reflow in normalize mode
1528 let should_normalize = || {
1529 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1530 // DO normalize if it has plain text content that spans multiple lines
1531 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1532 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1533 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1534 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1535 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1536 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1537
1538 // If we have structural blocks but no paragraphs, don't normalize
1539 if (has_nested_lists
1540 || has_code_blocks
1541 || has_semantic_lines
1542 || has_snippet_lines
1543 || has_div_markers)
1544 && !has_paragraphs
1545 {
1546 return false;
1547 }
1548
1549 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1550 if has_paragraphs {
1551 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1552 if paragraph_count > 1 {
1553 // Multiple paragraph blocks should be normalized
1554 return true;
1555 }
1556
1557 // Single paragraph block: normalize if it has multiple content lines
1558 if content_lines.len() > 1 {
1559 return true;
1560 }
1561 }
1562
1563 false
1564 };
1565
1566 let needs_reflow = match config.reflow_mode {
1567 ReflowMode::Normalize => {
1568 // Only reflow if:
1569 // 1. The combined line would exceed the limit, OR
1570 // 2. The list item should be normalized (has multi-line plain text)
1571 let combined_length = self.calculate_effective_length(&full_line);
1572 if combined_length > config.line_length.get() {
1573 true
1574 } else {
1575 should_normalize()
1576 }
1577 }
1578 ReflowMode::SentencePerLine => {
1579 // Check if list item has multiple sentences
1580 let sentences = split_into_sentences(&combined_content);
1581 sentences.len() > 1
1582 }
1583 ReflowMode::SemanticLineBreaks => {
1584 let sentences = split_into_sentences(&combined_content);
1585 sentences.len() > 1
1586 || (list_start..i).any(|line_idx| {
1587 self.calculate_effective_length(lines[line_idx]) > config.line_length.get()
1588 })
1589 }
1590 ReflowMode::Default => {
1591 // In default mode, only reflow if any individual line exceeds limit
1592 (list_start..i)
1593 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1594 }
1595 };
1596
1597 if needs_reflow {
1598 let start_range = line_index.whole_line_range(list_start + 1);
1599 let end_line = i - 1;
1600 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1601 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1602 } else {
1603 line_index.whole_line_range(end_line + 1)
1604 };
1605 let byte_range = start_range.start..end_range.end;
1606
1607 // Reflow each block (paragraphs only, preserve code blocks)
1608 // When line_length = 0 (no limit), use a very large value for reflow
1609 let reflow_line_length = if config.line_length.is_unlimited() {
1610 usize::MAX
1611 } else {
1612 config.line_length.get().saturating_sub(indent_size).max(1)
1613 };
1614 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1615 line_length: reflow_line_length,
1616 break_on_sentences: true,
1617 preserve_breaks: false,
1618 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1619 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1620 abbreviations: config.abbreviations_for_reflow(),
1621 length_mode: self.reflow_length_mode(),
1622 };
1623
1624 let mut result: Vec<String> = Vec::new();
1625 let mut is_first_block = true;
1626
1627 for (block_idx, block) in blocks.iter().enumerate() {
1628 match block {
1629 Block::Paragraph(para_lines) => {
1630 // Split the paragraph into segments at hard break boundaries
1631 // Each segment can be reflowed independently
1632 let segments = split_into_segments(para_lines);
1633
1634 for (segment_idx, segment) in segments.iter().enumerate() {
1635 // Check if this segment ends with a hard break and what type
1636 let hard_break_type = segment.last().and_then(|line| {
1637 let line = line.strip_suffix('\r').unwrap_or(line);
1638 if line.ends_with('\\') {
1639 Some("\\")
1640 } else if line.ends_with(" ") {
1641 Some(" ")
1642 } else {
1643 None
1644 }
1645 });
1646
1647 // Join and reflow the segment (removing the hard break marker for processing)
1648 let segment_for_reflow: Vec<String> = segment
1649 .iter()
1650 .map(|line| {
1651 // Strip hard break marker (2 spaces or backslash) for reflow processing
1652 if line.ends_with('\\') {
1653 line[..line.len() - 1].trim_end().to_string()
1654 } else if line.ends_with(" ") {
1655 line[..line.len() - 2].trim_end().to_string()
1656 } else {
1657 line.clone()
1658 }
1659 })
1660 .collect();
1661
1662 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1663 if !segment_text.is_empty() {
1664 let reflowed =
1665 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1666
1667 if is_first_block && segment_idx == 0 {
1668 // First segment of first block starts with marker
1669 result.push(format!("{marker}{}", reflowed[0]));
1670 for line in reflowed.iter().skip(1) {
1671 result.push(format!("{expected_indent}{line}"));
1672 }
1673 is_first_block = false;
1674 } else {
1675 // Subsequent segments
1676 for line in reflowed {
1677 result.push(format!("{expected_indent}{line}"));
1678 }
1679 }
1680
1681 // If this segment had a hard break, add it back to the last line
1682 // Preserve the original hard break format (backslash or two spaces)
1683 if let Some(break_marker) = hard_break_type
1684 && let Some(last_line) = result.last_mut()
1685 {
1686 last_line.push_str(break_marker);
1687 }
1688 }
1689 }
1690
1691 // Add blank line after paragraph block if there's a next block.
1692 // Check if next block is a code block that doesn't want a preceding blank.
1693 // Also don't add blank lines before snippet lines (they should stay tight).
1694 // Only add if not already ending with one (avoids double blanks).
1695 if block_idx < blocks.len() - 1 {
1696 let next_block = &blocks[block_idx + 1];
1697 let should_add_blank = match next_block {
1698 Block::Code {
1699 has_preceding_blank, ..
1700 } => *has_preceding_blank,
1701 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1702 _ => true, // For all other blocks, add blank line
1703 };
1704 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1705 {
1706 result.push(String::new());
1707 }
1708 }
1709 }
1710 Block::Code {
1711 lines: code_lines,
1712 has_preceding_blank: _,
1713 } => {
1714 // Preserve code blocks as-is with original indentation
1715 // NOTE: Blank line before code block is handled by the previous block
1716 // (see paragraph block's logic above)
1717
1718 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1719 if is_first_block && idx == 0 {
1720 // First line of first block gets marker
1721 result.push(format!(
1722 "{marker}{}",
1723 " ".repeat(orig_indent - marker_len) + content
1724 ));
1725 is_first_block = false;
1726 } else if content.is_empty() {
1727 result.push(String::new());
1728 } else {
1729 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1730 }
1731 }
1732 }
1733 Block::NestedList(nested_items) => {
1734 // Preserve nested list items as-is with original indentation.
1735 // Only add blank before if not already ending with one (avoids
1736 // double blanks when the preceding block already added one).
1737 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1738 result.push(String::new());
1739 }
1740
1741 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1742 if is_first_block && idx == 0 {
1743 // First line of first block gets marker
1744 result.push(format!(
1745 "{marker}{}",
1746 " ".repeat(orig_indent - marker_len) + content
1747 ));
1748 is_first_block = false;
1749 } else if content.is_empty() {
1750 result.push(String::new());
1751 } else {
1752 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1753 }
1754 }
1755
1756 // Add blank line after nested list if there's a next block.
1757 // Only add if not already ending with one (avoids double blanks
1758 // when the last nested item was already a blank line).
1759 if block_idx < blocks.len() - 1 {
1760 let next_block = &blocks[block_idx + 1];
1761 let should_add_blank = match next_block {
1762 Block::Code {
1763 has_preceding_blank, ..
1764 } => *has_preceding_blank,
1765 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1766 _ => true, // For all other blocks, add blank line
1767 };
1768 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1769 {
1770 result.push(String::new());
1771 }
1772 }
1773 }
1774 Block::SemanticLine(content) => {
1775 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
1776 // Only add blank before if not already ending with one.
1777 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1778 result.push(String::new());
1779 }
1780
1781 if is_first_block {
1782 // First block starts with marker
1783 result.push(format!("{marker}{content}"));
1784 is_first_block = false;
1785 } else {
1786 // Subsequent blocks use expected indent
1787 result.push(format!("{expected_indent}{content}"));
1788 }
1789
1790 // Add blank line after semantic line if there's a next block.
1791 // Only add if not already ending with one.
1792 if block_idx < blocks.len() - 1 {
1793 let next_block = &blocks[block_idx + 1];
1794 let should_add_blank = match next_block {
1795 Block::Code {
1796 has_preceding_blank, ..
1797 } => *has_preceding_blank,
1798 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1799 _ => true, // For all other blocks, add blank line
1800 };
1801 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1802 {
1803 result.push(String::new());
1804 }
1805 }
1806 }
1807 Block::SnippetLine(content) => {
1808 // Preserve snippet delimiters (-8<-) as-is on their own line
1809 // Unlike semantic lines, snippet lines don't add extra blank lines
1810 if is_first_block {
1811 // First block starts with marker
1812 result.push(format!("{marker}{content}"));
1813 is_first_block = false;
1814 } else {
1815 // Subsequent blocks use expected indent
1816 result.push(format!("{expected_indent}{content}"));
1817 }
1818 // No blank lines added before or after snippet delimiters
1819 }
1820 Block::DivMarker(content) => {
1821 // Preserve div markers (::: opening or closing) as-is on their own line
1822 if is_first_block {
1823 result.push(format!("{marker}{content}"));
1824 is_first_block = false;
1825 } else {
1826 result.push(format!("{expected_indent}{content}"));
1827 }
1828 }
1829 Block::Html {
1830 lines: html_lines,
1831 has_preceding_blank: _,
1832 } => {
1833 // Preserve HTML blocks exactly as-is with original indentation
1834 // NOTE: Blank line before HTML block is handled by the previous block
1835
1836 for (idx, line) in html_lines.iter().enumerate() {
1837 if is_first_block && idx == 0 {
1838 // First line of first block gets marker
1839 result.push(format!("{marker}{line}"));
1840 is_first_block = false;
1841 } else if line.is_empty() {
1842 // Preserve blank lines inside HTML blocks
1843 result.push(String::new());
1844 } else {
1845 // Preserve lines with their original content (already includes indentation)
1846 result.push(format!("{expected_indent}{line}"));
1847 }
1848 }
1849
1850 // Add blank line after HTML block if there's a next block.
1851 // Only add if not already ending with one (avoids double blanks
1852 // when the HTML block itself contained a trailing blank line).
1853 if block_idx < blocks.len() - 1 {
1854 let next_block = &blocks[block_idx + 1];
1855 let should_add_blank = match next_block {
1856 Block::Code {
1857 has_preceding_blank, ..
1858 } => *has_preceding_blank,
1859 Block::Html {
1860 has_preceding_blank, ..
1861 } => *has_preceding_blank,
1862 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1863 _ => true, // For all other blocks, add blank line
1864 };
1865 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1866 {
1867 result.push(String::new());
1868 }
1869 }
1870 }
1871 }
1872 }
1873
1874 let reflowed_text = result.join("\n");
1875
1876 // Preserve trailing newline
1877 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1878 format!("{reflowed_text}\n")
1879 } else {
1880 reflowed_text
1881 };
1882
1883 // Get the original text to compare
1884 let original_text = &ctx.content[byte_range.clone()];
1885
1886 // Only generate a warning if the replacement is different from the original
1887 if original_text != replacement {
1888 // Generate an appropriate message based on why reflow is needed
1889 let message = match config.reflow_mode {
1890 ReflowMode::SentencePerLine => {
1891 let num_sentences = split_into_sentences(&combined_content).len();
1892 let num_lines = content_lines.len();
1893 if num_lines == 1 {
1894 // Single line with multiple sentences
1895 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1896 } else {
1897 // Multiple lines - could be split sentences or mixed
1898 format!(
1899 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1900 )
1901 }
1902 }
1903 ReflowMode::SemanticLineBreaks => {
1904 let num_sentences = split_into_sentences(&combined_content).len();
1905 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
1906 }
1907 ReflowMode::Normalize => {
1908 let combined_length = self.calculate_effective_length(&full_line);
1909 if combined_length > config.line_length.get() {
1910 format!(
1911 "Line length {} exceeds {} characters",
1912 combined_length,
1913 config.line_length.get()
1914 )
1915 } else {
1916 "Multi-line content can be normalized".to_string()
1917 }
1918 }
1919 ReflowMode::Default => {
1920 let combined_length = self.calculate_effective_length(&full_line);
1921 format!(
1922 "Line length {} exceeds {} characters",
1923 combined_length,
1924 config.line_length.get()
1925 )
1926 }
1927 };
1928
1929 warnings.push(LintWarning {
1930 rule_name: Some(self.name().to_string()),
1931 message,
1932 line: list_start + 1,
1933 column: 1,
1934 end_line: end_line + 1,
1935 end_column: lines[end_line].len() + 1,
1936 severity: Severity::Warning,
1937 fix: Some(crate::rule::Fix {
1938 range: byte_range,
1939 replacement,
1940 }),
1941 });
1942 }
1943 }
1944 continue;
1945 }
1946
1947 // Found start of a paragraph - collect all lines in it
1948 let paragraph_start = i;
1949 let mut paragraph_lines = vec![lines[i]];
1950 i += 1;
1951
1952 while i < lines.len() {
1953 let next_line = lines[i];
1954 let next_line_num = i + 1;
1955 let next_trimmed = next_line.trim();
1956
1957 // Stop at paragraph boundaries
1958 if next_trimmed.is_empty()
1959 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1960 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1961 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1962 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1963 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1964 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1965 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1966 || ctx
1967 .line_info(next_line_num)
1968 .is_some_and(|info| info.in_mkdocs_container())
1969 || (next_line_num > 0
1970 && next_line_num <= ctx.lines.len()
1971 && ctx.lines[next_line_num - 1].blockquote.is_some())
1972 || next_trimmed.starts_with('#')
1973 || TableUtils::is_potential_table_row(next_line)
1974 || is_list_item(next_trimmed)
1975 || is_horizontal_rule(next_trimmed)
1976 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1977 || is_template_directive_only(next_line)
1978 || is_standalone_attr_list(next_line)
1979 || is_snippet_block_delimiter(next_line)
1980 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
1981 {
1982 break;
1983 }
1984
1985 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1986 if i > 0 && has_hard_break(lines[i - 1]) {
1987 // Don't include lines after hard breaks in the same paragraph
1988 break;
1989 }
1990
1991 paragraph_lines.push(next_line);
1992 i += 1;
1993 }
1994
1995 // Combine paragraph lines into a single string for processing
1996 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1997 let paragraph_text = paragraph_lines.join(" ");
1998
1999 // Skip reflowing if this paragraph contains definition list items
2000 // Definition lists are multi-line structures that should not be joined
2001 let contains_definition_list = paragraph_lines
2002 .iter()
2003 .any(|line| crate::utils::is_definition_list_item(line));
2004
2005 if contains_definition_list {
2006 // Don't reflow definition lists - skip this paragraph
2007 i = paragraph_start + paragraph_lines.len();
2008 continue;
2009 }
2010
2011 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2012 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2013 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2014
2015 if contains_snippets {
2016 // Don't reflow Snippets blocks - skip this paragraph
2017 i = paragraph_start + paragraph_lines.len();
2018 continue;
2019 }
2020
2021 // Check if this paragraph needs reflowing
2022 let needs_reflow = match config.reflow_mode {
2023 ReflowMode::Normalize => {
2024 // In normalize mode, reflow multi-line paragraphs
2025 paragraph_lines.len() > 1
2026 }
2027 ReflowMode::SentencePerLine => {
2028 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2029 // Note: we check the joined text because sentences can span multiple lines
2030 let sentences = split_into_sentences(¶graph_text);
2031
2032 // Always reflow if multiple sentences on one line
2033 if sentences.len() > 1 {
2034 true
2035 } else if paragraph_lines.len() > 1 {
2036 // For single-sentence paragraphs spanning multiple lines:
2037 // Reflow if they COULD fit on one line (respecting line-length constraint)
2038 if config.line_length.is_unlimited() {
2039 // No line-length constraint - always join single sentences
2040 true
2041 } else {
2042 // Only join if it fits within line-length
2043 let effective_length = self.calculate_effective_length(¶graph_text);
2044 effective_length <= config.line_length.get()
2045 }
2046 } else {
2047 false
2048 }
2049 }
2050 ReflowMode::SemanticLineBreaks => {
2051 let sentences = split_into_sentences(¶graph_text);
2052 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2053 sentences.len() > 1
2054 || paragraph_lines.len() > 1
2055 || paragraph_lines
2056 .iter()
2057 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2058 }
2059 ReflowMode::Default => {
2060 // In default mode, only reflow if lines exceed limit
2061 paragraph_lines
2062 .iter()
2063 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2064 }
2065 };
2066
2067 if needs_reflow {
2068 // Calculate byte range for this paragraph
2069 // Use whole_line_range for each line and combine
2070 let start_range = line_index.whole_line_range(paragraph_start + 1);
2071 let end_line = paragraph_start + paragraph_lines.len() - 1;
2072
2073 // For the last line, we want to preserve any trailing newline
2074 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2075 // Last line without trailing newline - use line_text_range
2076 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2077 } else {
2078 // Not the last line or has trailing newline - use whole_line_range
2079 line_index.whole_line_range(end_line + 1)
2080 };
2081
2082 let byte_range = start_range.start..end_range.end;
2083
2084 // Check if the paragraph ends with a hard break and what type
2085 let hard_break_type = paragraph_lines.last().and_then(|line| {
2086 let line = line.strip_suffix('\r').unwrap_or(line);
2087 if line.ends_with('\\') {
2088 Some("\\")
2089 } else if line.ends_with(" ") {
2090 Some(" ")
2091 } else {
2092 None
2093 }
2094 });
2095
2096 // Reflow the paragraph
2097 // When line_length = 0 (no limit), use a very large value for reflow
2098 let reflow_line_length = if config.line_length.is_unlimited() {
2099 usize::MAX
2100 } else {
2101 config.line_length.get()
2102 };
2103 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2104 line_length: reflow_line_length,
2105 break_on_sentences: true,
2106 preserve_breaks: false,
2107 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2108 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2109 abbreviations: config.abbreviations_for_reflow(),
2110 length_mode: self.reflow_length_mode(),
2111 };
2112 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
2113
2114 // If the original paragraph ended with a hard break, preserve it
2115 // Preserve the original hard break format (backslash or two spaces)
2116 if let Some(break_marker) = hard_break_type
2117 && !reflowed.is_empty()
2118 {
2119 let last_idx = reflowed.len() - 1;
2120 if !has_hard_break(&reflowed[last_idx]) {
2121 reflowed[last_idx].push_str(break_marker);
2122 }
2123 }
2124
2125 let reflowed_text = reflowed.join("\n");
2126
2127 // Preserve trailing newline if the original paragraph had one
2128 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2129 format!("{reflowed_text}\n")
2130 } else {
2131 reflowed_text
2132 };
2133
2134 // Get the original text to compare
2135 let original_text = &ctx.content[byte_range.clone()];
2136
2137 // Only generate a warning if the replacement is different from the original
2138 if original_text != replacement {
2139 // Create warning with actual fix
2140 // In default mode, report the specific line that violates
2141 // In normalize mode, report the whole paragraph
2142 // In sentence-per-line mode, report the entire paragraph
2143 let (warning_line, warning_end_line) = match config.reflow_mode {
2144 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2145 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2146 // Highlight the entire paragraph that needs reformatting
2147 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2148 }
2149 ReflowMode::Default => {
2150 // Find the first line that exceeds the limit
2151 let mut violating_line = paragraph_start;
2152 for (idx, line) in paragraph_lines.iter().enumerate() {
2153 if self.calculate_effective_length(line) > config.line_length.get() {
2154 violating_line = paragraph_start + idx;
2155 break;
2156 }
2157 }
2158 (violating_line + 1, violating_line + 1)
2159 }
2160 };
2161
2162 warnings.push(LintWarning {
2163 rule_name: Some(self.name().to_string()),
2164 message: match config.reflow_mode {
2165 ReflowMode::Normalize => format!(
2166 "Paragraph could be normalized to use line length of {} characters",
2167 config.line_length.get()
2168 ),
2169 ReflowMode::SentencePerLine => {
2170 let num_sentences = split_into_sentences(¶graph_text).len();
2171 if paragraph_lines.len() == 1 {
2172 // Single line with multiple sentences
2173 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2174 } else {
2175 let num_lines = paragraph_lines.len();
2176 // Multiple lines - could be split sentences or mixed
2177 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2178 }
2179 },
2180 ReflowMode::SemanticLineBreaks => {
2181 let num_sentences = split_into_sentences(¶graph_text).len();
2182 format!(
2183 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2184 )
2185 },
2186 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2187 },
2188 line: warning_line,
2189 column: 1,
2190 end_line: warning_end_line,
2191 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2192 severity: Severity::Warning,
2193 fix: Some(crate::rule::Fix {
2194 range: byte_range,
2195 replacement,
2196 }),
2197 });
2198 }
2199 }
2200 }
2201
2202 warnings
2203 }
2204
2205 /// Calculate string length based on the configured length mode
2206 fn calculate_string_length(&self, s: &str) -> usize {
2207 match self.config.length_mode {
2208 LengthMode::Chars => s.chars().count(),
2209 LengthMode::Visual => s.width(),
2210 LengthMode::Bytes => s.len(),
2211 }
2212 }
2213
2214 /// Calculate effective line length
2215 ///
2216 /// Returns the actual display length of the line using the configured length mode.
2217 fn calculate_effective_length(&self, line: &str) -> usize {
2218 self.calculate_string_length(line)
2219 }
2220
2221 /// Calculate line length with inline link/image URLs removed.
2222 ///
2223 /// For each inline link `[text](url)` or image `` on the line,
2224 /// computes the "savings" from removing the URL portion (keeping only `[text]`
2225 /// or `![alt]`). Returns `effective_length - total_savings`.
2226 ///
2227 /// Handles nested constructs (e.g., `[](url)`) by only counting the
2228 /// outermost construct to avoid double-counting.
2229 fn calculate_text_only_length(
2230 &self,
2231 effective_length: usize,
2232 line_number: usize,
2233 ctx: &crate::lint_context::LintContext,
2234 ) -> usize {
2235 let line_range = ctx.line_index.line_content_range(line_number);
2236 let line_byte_end = line_range.end;
2237
2238 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2239 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2240
2241 for link in &ctx.links {
2242 if link.line != line_number || link.is_reference {
2243 continue;
2244 }
2245 if !matches!(link.link_type, LinkType::Inline) {
2246 continue;
2247 }
2248 // Skip cross-line links
2249 if link.byte_end > line_byte_end {
2250 continue;
2251 }
2252 // `[text]` in configured length mode
2253 let text_only_len = 2 + self.calculate_string_length(&link.text);
2254 constructs.push((link.byte_offset, link.byte_end, text_only_len));
2255 }
2256
2257 for image in &ctx.images {
2258 if image.line != line_number || image.is_reference {
2259 continue;
2260 }
2261 if !matches!(image.link_type, LinkType::Inline) {
2262 continue;
2263 }
2264 // Skip cross-line images
2265 if image.byte_end > line_byte_end {
2266 continue;
2267 }
2268 // `![alt]` in configured length mode
2269 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2270 constructs.push((image.byte_offset, image.byte_end, text_only_len));
2271 }
2272
2273 if constructs.is_empty() {
2274 return effective_length;
2275 }
2276
2277 // Sort by byte offset to handle overlapping/nested constructs
2278 constructs.sort_by_key(|&(start, _, _)| start);
2279
2280 let mut total_savings: usize = 0;
2281 let mut last_end: usize = 0;
2282
2283 for (start, end, text_only_len) in &constructs {
2284 // Skip constructs nested inside a previously counted one
2285 if *start < last_end {
2286 continue;
2287 }
2288 // Full construct length in configured length mode
2289 let full_source = &ctx.content[*start..*end];
2290 let full_len = self.calculate_string_length(full_source);
2291 total_savings += full_len.saturating_sub(*text_only_len);
2292 last_end = *end;
2293 }
2294
2295 effective_length.saturating_sub(total_savings)
2296 }
2297}