rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_list_item,
26 is_standalone_link_or_image_line, split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37 pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42 line_idx: usize,
43 data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48 Self {
49 config: MD013Config {
50 line_length: crate::types::LineLength::new(line_length),
51 code_blocks,
52 tables,
53 headings,
54 paragraphs: true, // Default to true for backwards compatibility
55 strict,
56 reflow: false,
57 reflow_mode: ReflowMode::default(),
58 length_mode: LengthMode::default(),
59 abbreviations: Vec::new(),
60 },
61 }
62 }
63
64 pub fn from_config_struct(config: MD013Config) -> Self {
65 Self { config }
66 }
67
68 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
69 fn reflow_length_mode(&self) -> ReflowLengthMode {
70 match self.config.length_mode {
71 LengthMode::Chars => ReflowLengthMode::Chars,
72 LengthMode::Visual => ReflowLengthMode::Visual,
73 LengthMode::Bytes => ReflowLengthMode::Bytes,
74 }
75 }
76
77 fn should_ignore_line(
78 &self,
79 line: &str,
80 _lines: &[&str],
81 current_line: usize,
82 ctx: &crate::lint_context::LintContext,
83 ) -> bool {
84 if self.config.strict {
85 return false;
86 }
87
88 // Quick check for common patterns before expensive regex
89 let trimmed = line.trim();
90
91 // Only skip if the entire line is a URL (quick check first)
92 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
93 return true;
94 }
95
96 // Only skip if the entire line is an image reference (quick check first)
97 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
98 return true;
99 }
100
101 // Note: link reference definitions are handled as always-exempt (even in strict mode)
102 // in the main check loop, so they don't need to be checked here.
103
104 // Code blocks with long strings (only check if in code block)
105 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
106 && !trimmed.is_empty()
107 && !line.contains(' ')
108 && !line.contains('\t')
109 {
110 return true;
111 }
112
113 false
114 }
115
116 /// Check if rule should skip based on provided config (used for inline config support)
117 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
118 // Skip if content is empty
119 if ctx.content.is_empty() {
120 return true;
121 }
122
123 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
124 if config.reflow
125 && (config.reflow_mode == ReflowMode::SentencePerLine
126 || config.reflow_mode == ReflowMode::SemanticLineBreaks
127 || config.reflow_mode == ReflowMode::Normalize)
128 {
129 return false;
130 }
131
132 // Quick check: if total content is shorter than line limit, definitely skip
133 if ctx.content.len() <= config.line_length.get() {
134 return true;
135 }
136
137 // Skip if no line exceeds the limit
138 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
139 }
140}
141
142impl Rule for MD013LineLength {
143 fn name(&self) -> &'static str {
144 "MD013"
145 }
146
147 fn description(&self) -> &'static str {
148 "Line length should not be excessive"
149 }
150
151 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
152 // Use pre-parsed inline config from LintContext
153 let config_override = ctx.inline_config().get_rule_config("MD013");
154
155 // Apply configuration override if present
156 let effective_config = if let Some(json_config) = config_override {
157 if let Some(obj) = json_config.as_object() {
158 let mut config = self.config.clone();
159 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
160 config.line_length = crate::types::LineLength::new(line_length as usize);
161 }
162 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
163 config.code_blocks = code_blocks;
164 }
165 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
166 config.tables = tables;
167 }
168 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
169 config.headings = headings;
170 }
171 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
172 config.strict = strict;
173 }
174 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
175 config.reflow = reflow;
176 }
177 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
178 config.reflow_mode = match reflow_mode {
179 "default" => ReflowMode::Default,
180 "normalize" => ReflowMode::Normalize,
181 "sentence-per-line" => ReflowMode::SentencePerLine,
182 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
183 _ => ReflowMode::default(),
184 };
185 }
186 config
187 } else {
188 self.config.clone()
189 }
190 } else {
191 self.config.clone()
192 };
193
194 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
195 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
196 if self.should_skip_with_config(ctx, &effective_config)
197 && !(effective_config.reflow
198 && (effective_config.reflow_mode == ReflowMode::Normalize
199 || effective_config.reflow_mode == ReflowMode::SentencePerLine
200 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
201 {
202 return Ok(Vec::new());
203 }
204
205 // Direct implementation without DocumentStructure
206 let mut warnings = Vec::new();
207
208 // Special handling: line_length = 0 means "no line length limit"
209 // Skip all line length checks, but still allow reflow if enabled
210 let skip_length_checks = effective_config.line_length.is_unlimited();
211
212 // Pre-filter lines that could be problematic to avoid processing all lines
213 let mut candidate_lines = Vec::new();
214 if !skip_length_checks {
215 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
216 // Skip front matter - it should never be linted
217 if line_info.in_front_matter {
218 continue;
219 }
220
221 // Quick length check first
222 if line_info.byte_len > effective_config.line_length.get() {
223 candidate_lines.push(line_idx);
224 }
225 }
226 }
227
228 // If no candidate lines and not in normalize or sentence-per-line mode, early return
229 if candidate_lines.is_empty()
230 && !(effective_config.reflow
231 && (effective_config.reflow_mode == ReflowMode::Normalize
232 || effective_config.reflow_mode == ReflowMode::SentencePerLine
233 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
234 {
235 return Ok(warnings);
236 }
237
238 let lines = ctx.raw_lines();
239
240 // Create a quick lookup set for heading lines
241 // We need this for both the heading skip check AND the paragraphs check
242 let heading_lines_set: std::collections::HashSet<usize> = ctx
243 .lines
244 .iter()
245 .enumerate()
246 .filter(|(_, line)| line.heading.is_some())
247 .map(|(idx, _)| idx + 1)
248 .collect();
249
250 // Use pre-computed table blocks from context
251 // We need this for both the table skip check AND the paragraphs check
252 let table_blocks = &ctx.table_blocks;
253 let mut table_lines_set = std::collections::HashSet::new();
254 for table in table_blocks {
255 table_lines_set.insert(table.header_line + 1);
256 table_lines_set.insert(table.delimiter_line + 1);
257 for &line in &table.content_lines {
258 table_lines_set.insert(line + 1);
259 }
260 }
261
262 // Process candidate lines for line length checks
263 for &line_idx in &candidate_lines {
264 let line_number = line_idx + 1;
265 let line = lines[line_idx];
266
267 // Calculate actual line length (used in warning messages)
268 let effective_length = self.calculate_effective_length(line);
269
270 // Use single line length limit for all content
271 let line_limit = effective_config.line_length.get();
272
273 // In non-strict mode, forgive the trailing non-whitespace run.
274 // If the line only exceeds the limit because of a long token at the end
275 // (URL, link chain, identifier), it passes. This matches markdownlint's
276 // behavior: line.replace(/\S*$/u, "#")
277 let check_length = if effective_config.strict {
278 effective_length
279 } else {
280 match line.rfind(char::is_whitespace) {
281 Some(pos) => {
282 let ws_char = line[pos..].chars().next().unwrap();
283 let prefix_end = pos + ws_char.len_utf8();
284 self.calculate_string_length(&line[..prefix_end]) + 1
285 }
286 None => 1, // No whitespace — entire line is a single token
287 }
288 };
289
290 // Skip lines where the check length is within the limit
291 if check_length <= line_limit {
292 continue;
293 }
294
295 // Semantic link understanding: suppress when excess comes entirely from inline URLs
296 if !effective_config.strict {
297 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
298 if text_only_length <= line_limit {
299 continue;
300 }
301 }
302
303 // Skip mkdocstrings blocks (already handled by LintContext)
304 if ctx.lines[line_idx].in_mkdocstrings {
305 continue;
306 }
307
308 // Link reference definitions are always exempt, even in strict mode.
309 // There's no way to shorten them without breaking the URL.
310 {
311 let trimmed = line.trim();
312 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
313 continue;
314 }
315 }
316
317 // Skip various block types efficiently
318 if !effective_config.strict {
319 // Lines whose only content is a link/image are exempt.
320 // After stripping list markers, blockquote markers, and emphasis,
321 // if only a link or image remains, there is no way to shorten it.
322 if is_standalone_link_or_image_line(line) {
323 continue;
324 }
325
326 // Skip setext heading underlines
327 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
328 continue;
329 }
330
331 // Skip block elements according to config flags
332 // The flags mean: true = check these elements, false = skip these elements
333 // So we skip when the flag is FALSE and the line is in that element type
334 if (!effective_config.headings && heading_lines_set.contains(&line_number))
335 || (!effective_config.code_blocks
336 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
337 || (!effective_config.tables && table_lines_set.contains(&line_number))
338 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
339 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
340 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
341 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
342 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
343 {
344 continue;
345 }
346
347 // Check if this is a paragraph/regular text line
348 // If paragraphs = false, skip lines that are NOT in special blocks
349 if !effective_config.paragraphs {
350 let is_special_block = heading_lines_set.contains(&line_number)
351 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
352 || table_lines_set.contains(&line_number)
353 || ctx.lines[line_number - 1].blockquote.is_some()
354 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
355 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
356 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
357 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
358 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
359 || ctx
360 .line_info(line_number)
361 .is_some_and(|info| info.in_mkdocs_container());
362
363 // Skip regular paragraph text when paragraphs = false
364 if !is_special_block {
365 continue;
366 }
367 }
368
369 // Skip lines that are only a URL, image ref, or link ref
370 if self.should_ignore_line(line, lines, line_idx, ctx) {
371 continue;
372 }
373 }
374
375 // In sentence-per-line mode, check if this is a single long sentence
376 // If so, emit a warning without a fix (user must manually rephrase)
377 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
378 let sentences = split_into_sentences(line.trim());
379 if sentences.len() == 1 {
380 // Single sentence that's too long - warn but don't auto-fix
381 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
382
383 let (start_line, start_col, end_line, end_col) =
384 calculate_excess_range(line_number, line, line_limit);
385
386 warnings.push(LintWarning {
387 rule_name: Some(self.name().to_string()),
388 message,
389 line: start_line,
390 column: start_col,
391 end_line,
392 end_column: end_col,
393 severity: Severity::Warning,
394 fix: None, // No auto-fix for long single sentences
395 });
396 continue;
397 }
398 // Multiple sentences will be handled by paragraph-based reflow
399 continue;
400 }
401
402 // In semantic-line-breaks mode, skip per-line checks —
403 // all reflow is handled at the paragraph level with cascading splits
404 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
405 continue;
406 }
407
408 // Don't provide fix for individual lines when reflow is enabled
409 // Paragraph-based fixes will be handled separately
410 let fix = None;
411
412 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
413
414 // Calculate precise character range for the excess portion
415 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
416
417 warnings.push(LintWarning {
418 rule_name: Some(self.name().to_string()),
419 message,
420 line: start_line,
421 column: start_col,
422 end_line,
423 end_column: end_col,
424 severity: Severity::Warning,
425 fix,
426 });
427 }
428
429 // If reflow is enabled, generate paragraph-based fixes
430 if effective_config.reflow {
431 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
432 // Merge paragraph warnings with line warnings, removing duplicates
433 for pw in paragraph_warnings {
434 // Remove any line warnings that overlap with this paragraph
435 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
436 warnings.push(pw);
437 }
438 }
439
440 Ok(warnings)
441 }
442
443 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
444 // For CLI usage, apply fixes from warnings
445 // LSP will use the warning-based fixes directly
446 let warnings = self.check(ctx)?;
447
448 // If there are no fixes, return content unchanged
449 if !warnings.iter().any(|w| w.fix.is_some()) {
450 return Ok(ctx.content.to_string());
451 }
452
453 // Apply warning-based fixes
454 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
455 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
456 }
457
458 fn as_any(&self) -> &dyn std::any::Any {
459 self
460 }
461
462 fn category(&self) -> RuleCategory {
463 RuleCategory::Whitespace
464 }
465
466 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
467 self.should_skip_with_config(ctx, &self.config)
468 }
469
470 fn default_config_section(&self) -> Option<(String, toml::Value)> {
471 let default_config = MD013Config::default();
472 let json_value = serde_json::to_value(&default_config).ok()?;
473 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
474
475 if let toml::Value::Table(table) = toml_value {
476 if !table.is_empty() {
477 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
478 } else {
479 None
480 }
481 } else {
482 None
483 }
484 }
485
486 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
487 let mut aliases = std::collections::HashMap::new();
488 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
489 Some(aliases)
490 }
491
492 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
493 where
494 Self: Sized,
495 {
496 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
497 // Use global line_length if rule-specific config still has default value
498 if rule_config.line_length.get() == 80 {
499 rule_config.line_length = config.global.line_length;
500 }
501 Box::new(Self::from_config_struct(rule_config))
502 }
503}
504
505impl MD013LineLength {
506 fn is_blockquote_content_boundary(
507 &self,
508 content: &str,
509 line_num: usize,
510 ctx: &crate::lint_context::LintContext,
511 ) -> bool {
512 let trimmed = content.trim();
513
514 trimmed.is_empty()
515 || ctx.line_info(line_num).is_some_and(|info| {
516 info.in_code_block
517 || info.in_front_matter
518 || info.in_html_block
519 || info.in_html_comment
520 || info.in_esm_block
521 || info.in_jsx_expression
522 || info.in_mdx_comment
523 || info.in_mkdocstrings
524 || info.in_mkdocs_container()
525 || info.is_div_marker
526 })
527 || trimmed.starts_with('#')
528 || trimmed.starts_with("```")
529 || trimmed.starts_with("~~~")
530 || trimmed.starts_with('>')
531 || TableUtils::is_potential_table_row(content)
532 || is_list_item(trimmed)
533 || is_horizontal_rule(trimmed)
534 || (trimmed.starts_with('[') && content.contains("]:"))
535 || is_template_directive_only(content)
536 || is_standalone_attr_list(content)
537 || is_snippet_block_delimiter(content)
538 || is_github_alert_marker(trimmed)
539 }
540
541 fn generate_blockquote_paragraph_fix(
542 &self,
543 ctx: &crate::lint_context::LintContext,
544 config: &MD013Config,
545 lines: &[&str],
546 line_index: &LineIndex,
547 start_idx: usize,
548 ) -> (Option<LintWarning>, usize) {
549 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
550 return (None, start_idx + 1);
551 };
552 let target_level = start_bq.nesting_level;
553
554 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
555 let mut i = start_idx;
556
557 while i < lines.len() {
558 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
559 break;
560 }
561
562 let line_num = i + 1;
563 if line_num > ctx.lines.len() {
564 break;
565 }
566
567 if lines[i].trim().is_empty() {
568 break;
569 }
570
571 let line_bq = ctx.lines[i].blockquote.as_deref();
572 if let Some(bq) = line_bq {
573 if bq.nesting_level != target_level {
574 break;
575 }
576
577 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
578 break;
579 }
580
581 collected.push(CollectedBlockquoteLine {
582 line_idx: i,
583 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
584 });
585 i += 1;
586 continue;
587 }
588
589 let lazy_content = lines[i].trim_start();
590 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
591 break;
592 }
593
594 collected.push(CollectedBlockquoteLine {
595 line_idx: i,
596 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
597 });
598 i += 1;
599 }
600
601 if collected.is_empty() {
602 return (None, start_idx + 1);
603 }
604
605 let next_idx = i;
606 let paragraph_start = collected[0].line_idx;
607 let end_line = collected[collected.len() - 1].line_idx;
608 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
609 let paragraph_text = line_data
610 .iter()
611 .map(|d| d.content.as_str())
612 .collect::<Vec<_>>()
613 .join(" ");
614
615 let contains_definition_list = line_data
616 .iter()
617 .any(|d| crate::utils::is_definition_list_item(&d.content));
618 if contains_definition_list {
619 return (None, next_idx);
620 }
621
622 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
623 if contains_snippets {
624 return (None, next_idx);
625 }
626
627 let needs_reflow = match config.reflow_mode {
628 ReflowMode::Normalize => line_data.len() > 1,
629 ReflowMode::SentencePerLine => {
630 let sentences = split_into_sentences(¶graph_text);
631 sentences.len() > 1 || line_data.len() > 1
632 }
633 ReflowMode::SemanticLineBreaks => {
634 let sentences = split_into_sentences(¶graph_text);
635 sentences.len() > 1
636 || line_data.len() > 1
637 || collected
638 .iter()
639 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
640 }
641 ReflowMode::Default => collected
642 .iter()
643 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
644 };
645
646 if !needs_reflow {
647 return (None, next_idx);
648 }
649
650 let fallback_prefix = start_bq.prefix.clone();
651 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
652 let continuation_style = blockquote_continuation_style(&line_data);
653
654 let reflow_line_length = if config.line_length.is_unlimited() {
655 usize::MAX
656 } else {
657 config
658 .line_length
659 .get()
660 .saturating_sub(self.calculate_string_length(&explicit_prefix))
661 .max(1)
662 };
663
664 let reflow_options = crate::utils::text_reflow::ReflowOptions {
665 line_length: reflow_line_length,
666 break_on_sentences: true,
667 preserve_breaks: false,
668 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
669 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
670 abbreviations: config.abbreviations_for_reflow(),
671 length_mode: self.reflow_length_mode(),
672 };
673
674 let reflowed_with_style =
675 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
676
677 if reflowed_with_style.is_empty() {
678 return (None, next_idx);
679 }
680
681 let reflowed_text = reflowed_with_style.join("\n");
682
683 let start_range = line_index.whole_line_range(paragraph_start + 1);
684 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
685 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
686 } else {
687 line_index.whole_line_range(end_line + 1)
688 };
689 let byte_range = start_range.start..end_range.end;
690
691 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
692 format!("{reflowed_text}\n")
693 } else {
694 reflowed_text
695 };
696
697 let original_text = &ctx.content[byte_range.clone()];
698 if original_text == replacement {
699 return (None, next_idx);
700 }
701
702 let (warning_line, warning_end_line) = match config.reflow_mode {
703 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
704 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
705 ReflowMode::Default => {
706 let violating_line = collected
707 .iter()
708 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
709 .map(|line| line.line_idx + 1)
710 .unwrap_or(paragraph_start + 1);
711 (violating_line, violating_line)
712 }
713 };
714
715 let warning = LintWarning {
716 rule_name: Some(self.name().to_string()),
717 message: match config.reflow_mode {
718 ReflowMode::Normalize => format!(
719 "Paragraph could be normalized to use line length of {} characters",
720 config.line_length.get()
721 ),
722 ReflowMode::SentencePerLine => {
723 let num_sentences = split_into_sentences(¶graph_text).len();
724 if line_data.len() == 1 {
725 format!("Line contains {num_sentences} sentences (one sentence per line required)")
726 } else {
727 let num_lines = line_data.len();
728 format!(
729 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
730 )
731 }
732 }
733 ReflowMode::SemanticLineBreaks => {
734 let num_sentences = split_into_sentences(¶graph_text).len();
735 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
736 }
737 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
738 },
739 line: warning_line,
740 column: 1,
741 end_line: warning_end_line,
742 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
743 severity: Severity::Warning,
744 fix: Some(crate::rule::Fix {
745 range: byte_range,
746 replacement,
747 }),
748 };
749
750 (Some(warning), next_idx)
751 }
752
753 /// Generate paragraph-based fixes
754 fn generate_paragraph_fixes(
755 &self,
756 ctx: &crate::lint_context::LintContext,
757 config: &MD013Config,
758 lines: &[&str],
759 ) -> Vec<LintWarning> {
760 let mut warnings = Vec::new();
761 let line_index = LineIndex::new(ctx.content);
762
763 let mut i = 0;
764 while i < lines.len() {
765 let line_num = i + 1;
766
767 // Handle blockquote paragraphs with style-preserving reflow.
768 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
769 let (warning, next_idx) = self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i);
770 if let Some(warning) = warning {
771 warnings.push(warning);
772 }
773 i = next_idx;
774 continue;
775 }
776
777 // Skip special structures (but NOT MkDocs containers - those get special handling)
778 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
779 info.in_code_block
780 || info.in_front_matter
781 || info.in_html_block
782 || info.in_html_comment
783 || info.in_esm_block
784 || info.in_jsx_expression
785 || info.in_mdx_comment
786 || info.in_mkdocstrings
787 });
788
789 if should_skip_due_to_line_info
790 || lines[i].trim().starts_with('#')
791 || TableUtils::is_potential_table_row(lines[i])
792 || lines[i].trim().is_empty()
793 || is_horizontal_rule(lines[i].trim())
794 || is_template_directive_only(lines[i])
795 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
796 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
797 {
798 i += 1;
799 continue;
800 }
801
802 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
803 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
804 // Skip admonition/tab marker lines — only reflow their indented content
805 let current_line = lines[i];
806 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
807 i += 1;
808 continue;
809 }
810
811 let container_start = i;
812
813 // Detect the actual indent level from the first content line
814 // (supports nested admonitions with 8+ spaces)
815 let first_line = lines[i];
816 let base_indent_len = first_line.len() - first_line.trim_start().len();
817 let base_indent: String = " ".repeat(base_indent_len);
818
819 // Collect consecutive MkDocs container paragraph lines
820 let mut container_lines: Vec<&str> = Vec::new();
821 while i < lines.len() {
822 let current_line_num = i + 1;
823 let line_info = ctx.line_info(current_line_num);
824
825 // Stop if we leave the MkDocs container
826 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
827 break;
828 }
829
830 let line = lines[i];
831
832 // Stop at paragraph boundaries within the container
833 if line.trim().is_empty() {
834 break;
835 }
836
837 // Skip list items, code blocks, headings within containers
838 if is_list_item(line.trim())
839 || line.trim().starts_with("```")
840 || line.trim().starts_with("~~~")
841 || line.trim().starts_with('#')
842 {
843 break;
844 }
845
846 container_lines.push(line);
847 i += 1;
848 }
849
850 if container_lines.is_empty() {
851 // Must advance i to avoid infinite loop when we encounter
852 // non-paragraph content (code block, list, heading, empty line)
853 // at the start of an MkDocs container
854 i += 1;
855 continue;
856 }
857
858 // Strip the base indent from each line and join for reflow
859 let stripped_lines: Vec<&str> = container_lines
860 .iter()
861 .map(|line| {
862 if line.starts_with(&base_indent) {
863 &line[base_indent_len..]
864 } else {
865 line.trim_start()
866 }
867 })
868 .collect();
869 let paragraph_text = stripped_lines.join(" ");
870
871 // Check if reflow is needed
872 let needs_reflow = match config.reflow_mode {
873 ReflowMode::Normalize => container_lines.len() > 1,
874 ReflowMode::SentencePerLine => {
875 let sentences = split_into_sentences(¶graph_text);
876 sentences.len() > 1 || container_lines.len() > 1
877 }
878 ReflowMode::SemanticLineBreaks => {
879 let sentences = split_into_sentences(¶graph_text);
880 sentences.len() > 1
881 || container_lines.len() > 1
882 || container_lines
883 .iter()
884 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
885 }
886 ReflowMode::Default => container_lines
887 .iter()
888 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
889 };
890
891 if !needs_reflow {
892 continue;
893 }
894
895 // Calculate byte range for this container paragraph
896 let start_range = line_index.whole_line_range(container_start + 1);
897 let end_line = container_start + container_lines.len() - 1;
898 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
899 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
900 } else {
901 line_index.whole_line_range(end_line + 1)
902 };
903 let byte_range = start_range.start..end_range.end;
904
905 // Reflow with adjusted line length (accounting for the 4-space indent)
906 let reflow_line_length = if config.line_length.is_unlimited() {
907 usize::MAX
908 } else {
909 config.line_length.get().saturating_sub(base_indent_len).max(1)
910 };
911 let reflow_options = crate::utils::text_reflow::ReflowOptions {
912 line_length: reflow_line_length,
913 break_on_sentences: true,
914 preserve_breaks: false,
915 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
916 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
917 abbreviations: config.abbreviations_for_reflow(),
918 length_mode: self.reflow_length_mode(),
919 };
920 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
921
922 // Re-add the 4-space indent to each reflowed line
923 let reflowed_with_indent: Vec<String> =
924 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
925 let reflowed_text = reflowed_with_indent.join("\n");
926
927 // Preserve trailing newline
928 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
929 format!("{reflowed_text}\n")
930 } else {
931 reflowed_text
932 };
933
934 // Only generate a warning if the replacement is different
935 let original_text = &ctx.content[byte_range.clone()];
936 if original_text != replacement {
937 warnings.push(LintWarning {
938 rule_name: Some(self.name().to_string()),
939 message: format!(
940 "Line length {} exceeds {} characters (in MkDocs container)",
941 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
942 config.line_length.get()
943 ),
944 line: container_start + 1,
945 column: 1,
946 end_line: end_line + 1,
947 end_column: lines[end_line].len() + 1,
948 severity: Severity::Warning,
949 fix: Some(crate::rule::Fix {
950 range: byte_range,
951 replacement,
952 }),
953 });
954 }
955 continue;
956 }
957
958 // Helper function to detect semantic line markers
959 let is_semantic_line = |content: &str| -> bool {
960 let trimmed = content.trim_start();
961 let semantic_markers = [
962 "NOTE:",
963 "WARNING:",
964 "IMPORTANT:",
965 "CAUTION:",
966 "TIP:",
967 "DANGER:",
968 "HINT:",
969 "INFO:",
970 ];
971 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
972 };
973
974 // Helper function to detect fence markers (opening or closing)
975 let is_fence_marker = |content: &str| -> bool {
976 let trimmed = content.trim_start();
977 trimmed.starts_with("```") || trimmed.starts_with("~~~")
978 };
979
980 // Check if this is a list item - handle it specially
981 let trimmed = lines[i].trim();
982 if is_list_item(trimmed) {
983 // Collect the entire list item including continuation lines
984 let list_start = i;
985 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
986 let marker_len = marker.len();
987
988 // Track lines and their types (content, code block, fence, nested list)
989 #[derive(Clone)]
990 enum LineType {
991 Content(String),
992 CodeBlock(String, usize), // content and original indent
993 NestedListItem(String, usize), // full line content and original indent
994 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
995 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
996 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
997 Empty,
998 }
999
1000 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1001 i += 1;
1002
1003 // Collect continuation lines using ctx.lines for metadata
1004 while i < lines.len() {
1005 let line_info = &ctx.lines[i];
1006
1007 // Use pre-computed is_blank from ctx
1008 if line_info.is_blank {
1009 // Empty line - check if next line is indented (part of list item)
1010 if i + 1 < lines.len() {
1011 let next_info = &ctx.lines[i + 1];
1012
1013 // Check if next line is indented enough to be continuation
1014 if !next_info.is_blank && next_info.indent >= marker_len {
1015 // This blank line is between paragraphs/blocks in the list item
1016 list_item_lines.push(LineType::Empty);
1017 i += 1;
1018 continue;
1019 }
1020 }
1021 // No indented line after blank, end of list item
1022 break;
1023 }
1024
1025 // Use pre-computed indent from ctx
1026 let indent = line_info.indent;
1027
1028 // Valid continuation must be indented at least marker_len
1029 if indent >= marker_len {
1030 let trimmed = line_info.content(ctx.content).trim();
1031
1032 // Use pre-computed in_code_block from ctx
1033 if line_info.in_code_block {
1034 list_item_lines.push(LineType::CodeBlock(
1035 line_info.content(ctx.content)[indent..].to_string(),
1036 indent,
1037 ));
1038 i += 1;
1039 continue;
1040 }
1041
1042 // Check if this is a SIBLING list item (breaks parent)
1043 // Nested lists are indented >= marker_len and are PART of the parent item
1044 // Siblings are at indent < marker_len (at or before parent marker)
1045 if is_list_item(trimmed) && indent < marker_len {
1046 // This is a sibling item at same or higher level - end parent item
1047 break;
1048 }
1049
1050 // Check if this is a NESTED list item marker
1051 // Nested lists should be processed separately UNLESS they're part of a
1052 // multi-paragraph list item (indicated by a blank line before them OR
1053 // it's a continuation of an already-started nested list)
1054 if is_list_item(trimmed) && indent >= marker_len {
1055 // Check if there was a blank line before this (multi-paragraph context)
1056 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
1057
1058 // Check if we've already seen nested list content (another nested item)
1059 let has_nested_content = list_item_lines.iter().any(|line| {
1060 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
1061 || matches!(line, LineType::NestedListItem(_, _))
1062 });
1063
1064 if !has_blank_before && !has_nested_content {
1065 // Single-paragraph context with no prior nested items: starts a new item
1066 // End parent collection; nested list will be processed next
1067 break;
1068 }
1069 // else: multi-paragraph context or continuation of nested list, keep collecting
1070 // Mark this as a nested list item to preserve its structure
1071 list_item_lines.push(LineType::NestedListItem(
1072 line_info.content(ctx.content)[indent..].to_string(),
1073 indent,
1074 ));
1075 i += 1;
1076 continue;
1077 }
1078
1079 // Normal continuation: marker_len to marker_len+3
1080 if indent <= marker_len + 3 {
1081 // Extract content (remove indentation and trailing whitespace)
1082 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1083 // See: https://github.com/rvben/rumdl/issues/76
1084 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1085
1086 // Check if this is a div marker (::: opening or closing)
1087 // These must be preserved on their own line, not merged into paragraphs
1088 if line_info.is_div_marker {
1089 list_item_lines.push(LineType::DivMarker(content));
1090 }
1091 // Check if this is a fence marker (opening or closing)
1092 // These should be treated as code block lines, not paragraph content
1093 else if is_fence_marker(&content) {
1094 list_item_lines.push(LineType::CodeBlock(content, indent));
1095 }
1096 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1097 else if is_semantic_line(&content) {
1098 list_item_lines.push(LineType::SemanticLine(content));
1099 }
1100 // Check if this is a snippet block delimiter (-8<- or --8<--)
1101 // These must be preserved on their own lines for MkDocs Snippets extension
1102 else if is_snippet_block_delimiter(&content) {
1103 list_item_lines.push(LineType::SnippetLine(content));
1104 } else {
1105 list_item_lines.push(LineType::Content(content));
1106 }
1107 i += 1;
1108 } else {
1109 // indent >= marker_len + 4: indented code block
1110 list_item_lines.push(LineType::CodeBlock(
1111 line_info.content(ctx.content)[indent..].to_string(),
1112 indent,
1113 ));
1114 i += 1;
1115 }
1116 } else {
1117 // Not indented enough, end of list item
1118 break;
1119 }
1120 }
1121
1122 let indent_size = marker_len;
1123 let expected_indent = " ".repeat(indent_size);
1124
1125 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1126 #[derive(Clone)]
1127 enum Block {
1128 Paragraph(Vec<String>),
1129 Code {
1130 lines: Vec<(String, usize)>, // (content, indent) pairs
1131 has_preceding_blank: bool, // Whether there was a blank line before this block
1132 },
1133 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
1134 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1135 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
1136 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1137 Html {
1138 lines: Vec<String>, // HTML content preserved exactly as-is
1139 has_preceding_blank: bool, // Whether there was a blank line before this block
1140 },
1141 }
1142
1143 // HTML tag detection helpers
1144 // Block-level HTML tags that should trigger HTML block detection
1145 const BLOCK_LEVEL_TAGS: &[&str] = &[
1146 "div",
1147 "details",
1148 "summary",
1149 "section",
1150 "article",
1151 "header",
1152 "footer",
1153 "nav",
1154 "aside",
1155 "main",
1156 "table",
1157 "thead",
1158 "tbody",
1159 "tfoot",
1160 "tr",
1161 "td",
1162 "th",
1163 "ul",
1164 "ol",
1165 "li",
1166 "dl",
1167 "dt",
1168 "dd",
1169 "pre",
1170 "blockquote",
1171 "figure",
1172 "figcaption",
1173 "form",
1174 "fieldset",
1175 "legend",
1176 "hr",
1177 "p",
1178 "h1",
1179 "h2",
1180 "h3",
1181 "h4",
1182 "h5",
1183 "h6",
1184 "style",
1185 "script",
1186 "noscript",
1187 ];
1188
1189 fn is_block_html_opening_tag(line: &str) -> Option<String> {
1190 let trimmed = line.trim();
1191
1192 // Check for HTML comments
1193 if trimmed.starts_with("<!--") {
1194 return Some("!--".to_string());
1195 }
1196
1197 // Check for opening tags
1198 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1199 // Extract tag name from <tagname ...> or <tagname>
1200 let after_bracket = &trimmed[1..];
1201 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1202 let tag_name = after_bracket[..end].to_lowercase();
1203
1204 // Only treat as block if it's a known block-level tag
1205 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1206 return Some(tag_name);
1207 }
1208 }
1209 }
1210 None
1211 }
1212
1213 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1214 let trimmed = line.trim();
1215
1216 // Special handling for HTML comments
1217 if tag_name == "!--" {
1218 return trimmed.ends_with("-->");
1219 }
1220
1221 // Check for closing tags: </tagname> or </tagname ...>
1222 trimmed.starts_with(&format!("</{tag_name}>"))
1223 || trimmed.starts_with(&format!("</{tag_name} "))
1224 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1225 }
1226
1227 fn is_self_closing_tag(line: &str) -> bool {
1228 let trimmed = line.trim();
1229 trimmed.ends_with("/>")
1230 }
1231
1232 let mut blocks: Vec<Block> = Vec::new();
1233 let mut current_paragraph: Vec<String> = Vec::new();
1234 let mut current_code_block: Vec<(String, usize)> = Vec::new();
1235 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
1236 let mut current_html_block: Vec<String> = Vec::new();
1237 let mut html_tag_stack: Vec<String> = Vec::new();
1238 let mut in_code = false;
1239 let mut in_nested_list = false;
1240 let mut in_html_block = false;
1241 let mut had_preceding_blank = false; // Track if we just saw an empty line
1242 let mut code_block_has_preceding_blank = false; // Track blank before current code block
1243 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1244
1245 for line in &list_item_lines {
1246 match line {
1247 LineType::Empty => {
1248 if in_code {
1249 current_code_block.push((String::new(), 0));
1250 } else if in_nested_list {
1251 current_nested_list.push((String::new(), 0));
1252 } else if in_html_block {
1253 // Allow blank lines inside HTML blocks
1254 current_html_block.push(String::new());
1255 } else if !current_paragraph.is_empty() {
1256 blocks.push(Block::Paragraph(current_paragraph.clone()));
1257 current_paragraph.clear();
1258 }
1259 // Mark that we saw a blank line
1260 had_preceding_blank = true;
1261 }
1262 LineType::Content(content) => {
1263 // Check if we're currently in an HTML block
1264 if in_html_block {
1265 current_html_block.push(content.clone());
1266
1267 // Check if this line closes any open HTML tags
1268 if let Some(last_tag) = html_tag_stack.last() {
1269 if is_html_closing_tag(content, last_tag) {
1270 html_tag_stack.pop();
1271
1272 // If stack is empty, HTML block is complete
1273 if html_tag_stack.is_empty() {
1274 blocks.push(Block::Html {
1275 lines: current_html_block.clone(),
1276 has_preceding_blank: html_block_has_preceding_blank,
1277 });
1278 current_html_block.clear();
1279 in_html_block = false;
1280 }
1281 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1282 // Nested opening tag within HTML block
1283 if !is_self_closing_tag(content) {
1284 html_tag_stack.push(new_tag);
1285 }
1286 }
1287 }
1288 had_preceding_blank = false;
1289 } else {
1290 // Not in HTML block - check if this line starts one
1291 if let Some(tag_name) = is_block_html_opening_tag(content) {
1292 // Flush current paragraph before starting HTML block
1293 if in_code {
1294 blocks.push(Block::Code {
1295 lines: current_code_block.clone(),
1296 has_preceding_blank: code_block_has_preceding_blank,
1297 });
1298 current_code_block.clear();
1299 in_code = false;
1300 } else if in_nested_list {
1301 blocks.push(Block::NestedList(current_nested_list.clone()));
1302 current_nested_list.clear();
1303 in_nested_list = false;
1304 } else if !current_paragraph.is_empty() {
1305 blocks.push(Block::Paragraph(current_paragraph.clone()));
1306 current_paragraph.clear();
1307 }
1308
1309 // Start new HTML block
1310 in_html_block = true;
1311 html_block_has_preceding_blank = had_preceding_blank;
1312 current_html_block.push(content.clone());
1313
1314 // Check if it's self-closing or needs a closing tag
1315 if is_self_closing_tag(content) {
1316 // Self-closing tag - complete the HTML block immediately
1317 blocks.push(Block::Html {
1318 lines: current_html_block.clone(),
1319 has_preceding_blank: html_block_has_preceding_blank,
1320 });
1321 current_html_block.clear();
1322 in_html_block = false;
1323 } else {
1324 // Regular opening tag - push to stack
1325 html_tag_stack.push(tag_name);
1326 }
1327 } else {
1328 // Regular content line - add to paragraph
1329 if in_code {
1330 // Switching from code to content
1331 blocks.push(Block::Code {
1332 lines: current_code_block.clone(),
1333 has_preceding_blank: code_block_has_preceding_blank,
1334 });
1335 current_code_block.clear();
1336 in_code = false;
1337 } else if in_nested_list {
1338 // Switching from nested list to content
1339 blocks.push(Block::NestedList(current_nested_list.clone()));
1340 current_nested_list.clear();
1341 in_nested_list = false;
1342 }
1343 current_paragraph.push(content.clone());
1344 }
1345 had_preceding_blank = false; // Reset after content
1346 }
1347 }
1348 LineType::CodeBlock(content, indent) => {
1349 if in_nested_list {
1350 // Switching from nested list to code
1351 blocks.push(Block::NestedList(current_nested_list.clone()));
1352 current_nested_list.clear();
1353 in_nested_list = false;
1354 } else if in_html_block {
1355 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1356 blocks.push(Block::Html {
1357 lines: current_html_block.clone(),
1358 has_preceding_blank: html_block_has_preceding_blank,
1359 });
1360 current_html_block.clear();
1361 html_tag_stack.clear();
1362 in_html_block = false;
1363 }
1364 if !in_code {
1365 // Switching from content to code
1366 if !current_paragraph.is_empty() {
1367 blocks.push(Block::Paragraph(current_paragraph.clone()));
1368 current_paragraph.clear();
1369 }
1370 in_code = true;
1371 // Record whether there was a blank line before this code block
1372 code_block_has_preceding_blank = had_preceding_blank;
1373 }
1374 current_code_block.push((content.clone(), *indent));
1375 had_preceding_blank = false; // Reset after code
1376 }
1377 LineType::NestedListItem(content, indent) => {
1378 if in_code {
1379 // Switching from code to nested list
1380 blocks.push(Block::Code {
1381 lines: current_code_block.clone(),
1382 has_preceding_blank: code_block_has_preceding_blank,
1383 });
1384 current_code_block.clear();
1385 in_code = false;
1386 } else if in_html_block {
1387 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1388 blocks.push(Block::Html {
1389 lines: current_html_block.clone(),
1390 has_preceding_blank: html_block_has_preceding_blank,
1391 });
1392 current_html_block.clear();
1393 html_tag_stack.clear();
1394 in_html_block = false;
1395 }
1396 if !in_nested_list {
1397 // Switching from content to nested list
1398 if !current_paragraph.is_empty() {
1399 blocks.push(Block::Paragraph(current_paragraph.clone()));
1400 current_paragraph.clear();
1401 }
1402 in_nested_list = true;
1403 }
1404 current_nested_list.push((content.clone(), *indent));
1405 had_preceding_blank = false; // Reset after nested list
1406 }
1407 LineType::SemanticLine(content) => {
1408 // Semantic lines are standalone - flush any current block and add as separate block
1409 if in_code {
1410 blocks.push(Block::Code {
1411 lines: current_code_block.clone(),
1412 has_preceding_blank: code_block_has_preceding_blank,
1413 });
1414 current_code_block.clear();
1415 in_code = false;
1416 } else if in_nested_list {
1417 blocks.push(Block::NestedList(current_nested_list.clone()));
1418 current_nested_list.clear();
1419 in_nested_list = false;
1420 } else if in_html_block {
1421 blocks.push(Block::Html {
1422 lines: current_html_block.clone(),
1423 has_preceding_blank: html_block_has_preceding_blank,
1424 });
1425 current_html_block.clear();
1426 html_tag_stack.clear();
1427 in_html_block = false;
1428 } else if !current_paragraph.is_empty() {
1429 blocks.push(Block::Paragraph(current_paragraph.clone()));
1430 current_paragraph.clear();
1431 }
1432 // Add semantic line as its own block
1433 blocks.push(Block::SemanticLine(content.clone()));
1434 had_preceding_blank = false; // Reset after semantic line
1435 }
1436 LineType::SnippetLine(content) => {
1437 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1438 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1439 if in_code {
1440 blocks.push(Block::Code {
1441 lines: current_code_block.clone(),
1442 has_preceding_blank: code_block_has_preceding_blank,
1443 });
1444 current_code_block.clear();
1445 in_code = false;
1446 } else if in_nested_list {
1447 blocks.push(Block::NestedList(current_nested_list.clone()));
1448 current_nested_list.clear();
1449 in_nested_list = false;
1450 } else if in_html_block {
1451 blocks.push(Block::Html {
1452 lines: current_html_block.clone(),
1453 has_preceding_blank: html_block_has_preceding_blank,
1454 });
1455 current_html_block.clear();
1456 html_tag_stack.clear();
1457 in_html_block = false;
1458 } else if !current_paragraph.is_empty() {
1459 blocks.push(Block::Paragraph(current_paragraph.clone()));
1460 current_paragraph.clear();
1461 }
1462 // Add snippet line as its own block
1463 blocks.push(Block::SnippetLine(content.clone()));
1464 had_preceding_blank = false;
1465 }
1466 LineType::DivMarker(content) => {
1467 // Div markers (::: opening or closing) are standalone structural delimiters
1468 // Flush any current block and add as separate block
1469 if in_code {
1470 blocks.push(Block::Code {
1471 lines: current_code_block.clone(),
1472 has_preceding_blank: code_block_has_preceding_blank,
1473 });
1474 current_code_block.clear();
1475 in_code = false;
1476 } else if in_nested_list {
1477 blocks.push(Block::NestedList(current_nested_list.clone()));
1478 current_nested_list.clear();
1479 in_nested_list = false;
1480 } else if in_html_block {
1481 blocks.push(Block::Html {
1482 lines: current_html_block.clone(),
1483 has_preceding_blank: html_block_has_preceding_blank,
1484 });
1485 current_html_block.clear();
1486 html_tag_stack.clear();
1487 in_html_block = false;
1488 } else if !current_paragraph.is_empty() {
1489 blocks.push(Block::Paragraph(current_paragraph.clone()));
1490 current_paragraph.clear();
1491 }
1492 blocks.push(Block::DivMarker(content.clone()));
1493 had_preceding_blank = false;
1494 }
1495 }
1496 }
1497
1498 // Push remaining block
1499 if in_code && !current_code_block.is_empty() {
1500 blocks.push(Block::Code {
1501 lines: current_code_block,
1502 has_preceding_blank: code_block_has_preceding_blank,
1503 });
1504 } else if in_nested_list && !current_nested_list.is_empty() {
1505 blocks.push(Block::NestedList(current_nested_list));
1506 } else if in_html_block && !current_html_block.is_empty() {
1507 // If we still have an unclosed HTML block, push it anyway
1508 // (malformed HTML - missing closing tag)
1509 blocks.push(Block::Html {
1510 lines: current_html_block,
1511 has_preceding_blank: html_block_has_preceding_blank,
1512 });
1513 } else if !current_paragraph.is_empty() {
1514 blocks.push(Block::Paragraph(current_paragraph));
1515 }
1516
1517 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1518 let content_lines: Vec<String> = list_item_lines
1519 .iter()
1520 .filter_map(|line| {
1521 if let LineType::Content(s) = line {
1522 Some(s.clone())
1523 } else {
1524 None
1525 }
1526 })
1527 .collect();
1528
1529 // Check if we need to reflow this list item
1530 // We check the combined content to see if it exceeds length limits
1531 let combined_content = content_lines.join(" ").trim().to_string();
1532 let full_line = format!("{marker}{combined_content}");
1533
1534 // Helper to check if we should reflow in normalize mode
1535 let should_normalize = || {
1536 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1537 // DO normalize if it has plain text content that spans multiple lines
1538 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1539 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1540 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1541 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1542 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1543 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1544
1545 // If we have structural blocks but no paragraphs, don't normalize
1546 if (has_nested_lists
1547 || has_code_blocks
1548 || has_semantic_lines
1549 || has_snippet_lines
1550 || has_div_markers)
1551 && !has_paragraphs
1552 {
1553 return false;
1554 }
1555
1556 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1557 if has_paragraphs {
1558 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1559 if paragraph_count > 1 {
1560 // Multiple paragraph blocks should be normalized
1561 return true;
1562 }
1563
1564 // Single paragraph block: normalize if it has multiple content lines
1565 if content_lines.len() > 1 {
1566 return true;
1567 }
1568 }
1569
1570 false
1571 };
1572
1573 let needs_reflow = match config.reflow_mode {
1574 ReflowMode::Normalize => {
1575 // Only reflow if:
1576 // 1. The combined line would exceed the limit, OR
1577 // 2. The list item should be normalized (has multi-line plain text)
1578 let combined_length = self.calculate_effective_length(&full_line);
1579 if combined_length > config.line_length.get() {
1580 true
1581 } else {
1582 should_normalize()
1583 }
1584 }
1585 ReflowMode::SentencePerLine => {
1586 // Check if list item has multiple sentences
1587 let sentences = split_into_sentences(&combined_content);
1588 sentences.len() > 1
1589 }
1590 ReflowMode::SemanticLineBreaks => {
1591 let sentences = split_into_sentences(&combined_content);
1592 sentences.len() > 1
1593 || (list_start..i).any(|line_idx| {
1594 self.calculate_effective_length(lines[line_idx]) > config.line_length.get()
1595 })
1596 }
1597 ReflowMode::Default => {
1598 // In default mode, only reflow if any individual line exceeds limit
1599 (list_start..i)
1600 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1601 }
1602 };
1603
1604 if needs_reflow {
1605 let start_range = line_index.whole_line_range(list_start + 1);
1606 let end_line = i - 1;
1607 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1608 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1609 } else {
1610 line_index.whole_line_range(end_line + 1)
1611 };
1612 let byte_range = start_range.start..end_range.end;
1613
1614 // Reflow each block (paragraphs only, preserve code blocks)
1615 // When line_length = 0 (no limit), use a very large value for reflow
1616 let reflow_line_length = if config.line_length.is_unlimited() {
1617 usize::MAX
1618 } else {
1619 config.line_length.get().saturating_sub(indent_size).max(1)
1620 };
1621 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1622 line_length: reflow_line_length,
1623 break_on_sentences: true,
1624 preserve_breaks: false,
1625 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1626 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1627 abbreviations: config.abbreviations_for_reflow(),
1628 length_mode: self.reflow_length_mode(),
1629 };
1630
1631 let mut result: Vec<String> = Vec::new();
1632 let mut is_first_block = true;
1633
1634 for (block_idx, block) in blocks.iter().enumerate() {
1635 match block {
1636 Block::Paragraph(para_lines) => {
1637 // Split the paragraph into segments at hard break boundaries
1638 // Each segment can be reflowed independently
1639 let segments = split_into_segments(para_lines);
1640
1641 for (segment_idx, segment) in segments.iter().enumerate() {
1642 // Check if this segment ends with a hard break and what type
1643 let hard_break_type = segment.last().and_then(|line| {
1644 let line = line.strip_suffix('\r').unwrap_or(line);
1645 if line.ends_with('\\') {
1646 Some("\\")
1647 } else if line.ends_with(" ") {
1648 Some(" ")
1649 } else {
1650 None
1651 }
1652 });
1653
1654 // Join and reflow the segment (removing the hard break marker for processing)
1655 let segment_for_reflow: Vec<String> = segment
1656 .iter()
1657 .map(|line| {
1658 // Strip hard break marker (2 spaces or backslash) for reflow processing
1659 if line.ends_with('\\') {
1660 line[..line.len() - 1].trim_end().to_string()
1661 } else if line.ends_with(" ") {
1662 line[..line.len() - 2].trim_end().to_string()
1663 } else {
1664 line.clone()
1665 }
1666 })
1667 .collect();
1668
1669 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1670 if !segment_text.is_empty() {
1671 let reflowed =
1672 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1673
1674 if is_first_block && segment_idx == 0 {
1675 // First segment of first block starts with marker
1676 result.push(format!("{marker}{}", reflowed[0]));
1677 for line in reflowed.iter().skip(1) {
1678 result.push(format!("{expected_indent}{line}"));
1679 }
1680 is_first_block = false;
1681 } else {
1682 // Subsequent segments
1683 for line in reflowed {
1684 result.push(format!("{expected_indent}{line}"));
1685 }
1686 }
1687
1688 // If this segment had a hard break, add it back to the last line
1689 // Preserve the original hard break format (backslash or two spaces)
1690 if let Some(break_marker) = hard_break_type
1691 && let Some(last_line) = result.last_mut()
1692 {
1693 last_line.push_str(break_marker);
1694 }
1695 }
1696 }
1697
1698 // Add blank line after paragraph block if there's a next block.
1699 // Check if next block is a code block that doesn't want a preceding blank.
1700 // Also don't add blank lines before snippet lines (they should stay tight).
1701 // Only add if not already ending with one (avoids double blanks).
1702 if block_idx < blocks.len() - 1 {
1703 let next_block = &blocks[block_idx + 1];
1704 let should_add_blank = match next_block {
1705 Block::Code {
1706 has_preceding_blank, ..
1707 } => *has_preceding_blank,
1708 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1709 _ => true, // For all other blocks, add blank line
1710 };
1711 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1712 {
1713 result.push(String::new());
1714 }
1715 }
1716 }
1717 Block::Code {
1718 lines: code_lines,
1719 has_preceding_blank: _,
1720 } => {
1721 // Preserve code blocks as-is with original indentation
1722 // NOTE: Blank line before code block is handled by the previous block
1723 // (see paragraph block's logic above)
1724
1725 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1726 if is_first_block && idx == 0 {
1727 // First line of first block gets marker
1728 result.push(format!(
1729 "{marker}{}",
1730 " ".repeat(orig_indent - marker_len) + content
1731 ));
1732 is_first_block = false;
1733 } else if content.is_empty() {
1734 result.push(String::new());
1735 } else {
1736 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1737 }
1738 }
1739 }
1740 Block::NestedList(nested_items) => {
1741 // Preserve nested list items as-is with original indentation.
1742 // Only add blank before if not already ending with one (avoids
1743 // double blanks when the preceding block already added one).
1744 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1745 result.push(String::new());
1746 }
1747
1748 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1749 if is_first_block && idx == 0 {
1750 // First line of first block gets marker
1751 result.push(format!(
1752 "{marker}{}",
1753 " ".repeat(orig_indent - marker_len) + content
1754 ));
1755 is_first_block = false;
1756 } else if content.is_empty() {
1757 result.push(String::new());
1758 } else {
1759 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1760 }
1761 }
1762
1763 // Add blank line after nested list if there's a next block.
1764 // Only add if not already ending with one (avoids double blanks
1765 // when the last nested item was already a blank line).
1766 if block_idx < blocks.len() - 1 {
1767 let next_block = &blocks[block_idx + 1];
1768 let should_add_blank = match next_block {
1769 Block::Code {
1770 has_preceding_blank, ..
1771 } => *has_preceding_blank,
1772 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1773 _ => true, // For all other blocks, add blank line
1774 };
1775 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1776 {
1777 result.push(String::new());
1778 }
1779 }
1780 }
1781 Block::SemanticLine(content) => {
1782 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
1783 // Only add blank before if not already ending with one.
1784 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1785 result.push(String::new());
1786 }
1787
1788 if is_first_block {
1789 // First block starts with marker
1790 result.push(format!("{marker}{content}"));
1791 is_first_block = false;
1792 } else {
1793 // Subsequent blocks use expected indent
1794 result.push(format!("{expected_indent}{content}"));
1795 }
1796
1797 // Add blank line after semantic line if there's a next block.
1798 // Only add if not already ending with one.
1799 if block_idx < blocks.len() - 1 {
1800 let next_block = &blocks[block_idx + 1];
1801 let should_add_blank = match next_block {
1802 Block::Code {
1803 has_preceding_blank, ..
1804 } => *has_preceding_blank,
1805 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1806 _ => true, // For all other blocks, add blank line
1807 };
1808 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1809 {
1810 result.push(String::new());
1811 }
1812 }
1813 }
1814 Block::SnippetLine(content) => {
1815 // Preserve snippet delimiters (-8<-) as-is on their own line
1816 // Unlike semantic lines, snippet lines don't add extra blank lines
1817 if is_first_block {
1818 // First block starts with marker
1819 result.push(format!("{marker}{content}"));
1820 is_first_block = false;
1821 } else {
1822 // Subsequent blocks use expected indent
1823 result.push(format!("{expected_indent}{content}"));
1824 }
1825 // No blank lines added before or after snippet delimiters
1826 }
1827 Block::DivMarker(content) => {
1828 // Preserve div markers (::: opening or closing) as-is on their own line
1829 if is_first_block {
1830 result.push(format!("{marker}{content}"));
1831 is_first_block = false;
1832 } else {
1833 result.push(format!("{expected_indent}{content}"));
1834 }
1835 }
1836 Block::Html {
1837 lines: html_lines,
1838 has_preceding_blank: _,
1839 } => {
1840 // Preserve HTML blocks exactly as-is with original indentation
1841 // NOTE: Blank line before HTML block is handled by the previous block
1842
1843 for (idx, line) in html_lines.iter().enumerate() {
1844 if is_first_block && idx == 0 {
1845 // First line of first block gets marker
1846 result.push(format!("{marker}{line}"));
1847 is_first_block = false;
1848 } else if line.is_empty() {
1849 // Preserve blank lines inside HTML blocks
1850 result.push(String::new());
1851 } else {
1852 // Preserve lines with their original content (already includes indentation)
1853 result.push(format!("{expected_indent}{line}"));
1854 }
1855 }
1856
1857 // Add blank line after HTML block if there's a next block.
1858 // Only add if not already ending with one (avoids double blanks
1859 // when the HTML block itself contained a trailing blank line).
1860 if block_idx < blocks.len() - 1 {
1861 let next_block = &blocks[block_idx + 1];
1862 let should_add_blank = match next_block {
1863 Block::Code {
1864 has_preceding_blank, ..
1865 } => *has_preceding_blank,
1866 Block::Html {
1867 has_preceding_blank, ..
1868 } => *has_preceding_blank,
1869 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1870 _ => true, // For all other blocks, add blank line
1871 };
1872 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1873 {
1874 result.push(String::new());
1875 }
1876 }
1877 }
1878 }
1879 }
1880
1881 let reflowed_text = result.join("\n");
1882
1883 // Preserve trailing newline
1884 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1885 format!("{reflowed_text}\n")
1886 } else {
1887 reflowed_text
1888 };
1889
1890 // Get the original text to compare
1891 let original_text = &ctx.content[byte_range.clone()];
1892
1893 // Only generate a warning if the replacement is different from the original
1894 if original_text != replacement {
1895 // Generate an appropriate message based on why reflow is needed
1896 let message = match config.reflow_mode {
1897 ReflowMode::SentencePerLine => {
1898 let num_sentences = split_into_sentences(&combined_content).len();
1899 let num_lines = content_lines.len();
1900 if num_lines == 1 {
1901 // Single line with multiple sentences
1902 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1903 } else {
1904 // Multiple lines - could be split sentences or mixed
1905 format!(
1906 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1907 )
1908 }
1909 }
1910 ReflowMode::SemanticLineBreaks => {
1911 let num_sentences = split_into_sentences(&combined_content).len();
1912 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
1913 }
1914 ReflowMode::Normalize => {
1915 let combined_length = self.calculate_effective_length(&full_line);
1916 if combined_length > config.line_length.get() {
1917 format!(
1918 "Line length {} exceeds {} characters",
1919 combined_length,
1920 config.line_length.get()
1921 )
1922 } else {
1923 "Multi-line content can be normalized".to_string()
1924 }
1925 }
1926 ReflowMode::Default => {
1927 let combined_length = self.calculate_effective_length(&full_line);
1928 format!(
1929 "Line length {} exceeds {} characters",
1930 combined_length,
1931 config.line_length.get()
1932 )
1933 }
1934 };
1935
1936 warnings.push(LintWarning {
1937 rule_name: Some(self.name().to_string()),
1938 message,
1939 line: list_start + 1,
1940 column: 1,
1941 end_line: end_line + 1,
1942 end_column: lines[end_line].len() + 1,
1943 severity: Severity::Warning,
1944 fix: Some(crate::rule::Fix {
1945 range: byte_range,
1946 replacement,
1947 }),
1948 });
1949 }
1950 }
1951 continue;
1952 }
1953
1954 // Found start of a paragraph - collect all lines in it
1955 let paragraph_start = i;
1956 let mut paragraph_lines = vec![lines[i]];
1957 i += 1;
1958
1959 while i < lines.len() {
1960 let next_line = lines[i];
1961 let next_line_num = i + 1;
1962 let next_trimmed = next_line.trim();
1963
1964 // Stop at paragraph boundaries
1965 if next_trimmed.is_empty()
1966 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1967 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1968 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1969 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1970 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1971 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1972 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1973 || ctx
1974 .line_info(next_line_num)
1975 .is_some_and(|info| info.in_mkdocs_container())
1976 || (next_line_num > 0
1977 && next_line_num <= ctx.lines.len()
1978 && ctx.lines[next_line_num - 1].blockquote.is_some())
1979 || next_trimmed.starts_with('#')
1980 || TableUtils::is_potential_table_row(next_line)
1981 || is_list_item(next_trimmed)
1982 || is_horizontal_rule(next_trimmed)
1983 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1984 || is_template_directive_only(next_line)
1985 || is_standalone_attr_list(next_line)
1986 || is_snippet_block_delimiter(next_line)
1987 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
1988 {
1989 break;
1990 }
1991
1992 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1993 if i > 0 && has_hard_break(lines[i - 1]) {
1994 // Don't include lines after hard breaks in the same paragraph
1995 break;
1996 }
1997
1998 paragraph_lines.push(next_line);
1999 i += 1;
2000 }
2001
2002 // Combine paragraph lines into a single string for processing
2003 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
2004 let paragraph_text = paragraph_lines.join(" ");
2005
2006 // Skip reflowing if this paragraph contains definition list items
2007 // Definition lists are multi-line structures that should not be joined
2008 let contains_definition_list = paragraph_lines
2009 .iter()
2010 .any(|line| crate::utils::is_definition_list_item(line));
2011
2012 if contains_definition_list {
2013 // Don't reflow definition lists - skip this paragraph
2014 i = paragraph_start + paragraph_lines.len();
2015 continue;
2016 }
2017
2018 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2019 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2020 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2021
2022 if contains_snippets {
2023 // Don't reflow Snippets blocks - skip this paragraph
2024 i = paragraph_start + paragraph_lines.len();
2025 continue;
2026 }
2027
2028 // Check if this paragraph needs reflowing
2029 let needs_reflow = match config.reflow_mode {
2030 ReflowMode::Normalize => {
2031 // In normalize mode, reflow multi-line paragraphs
2032 paragraph_lines.len() > 1
2033 }
2034 ReflowMode::SentencePerLine => {
2035 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2036 // Note: we check the joined text because sentences can span multiple lines
2037 let sentences = split_into_sentences(¶graph_text);
2038
2039 // Always reflow if multiple sentences on one line
2040 if sentences.len() > 1 {
2041 true
2042 } else if paragraph_lines.len() > 1 {
2043 // For single-sentence paragraphs spanning multiple lines:
2044 // Reflow if they COULD fit on one line (respecting line-length constraint)
2045 if config.line_length.is_unlimited() {
2046 // No line-length constraint - always join single sentences
2047 true
2048 } else {
2049 // Only join if it fits within line-length
2050 let effective_length = self.calculate_effective_length(¶graph_text);
2051 effective_length <= config.line_length.get()
2052 }
2053 } else {
2054 false
2055 }
2056 }
2057 ReflowMode::SemanticLineBreaks => {
2058 let sentences = split_into_sentences(¶graph_text);
2059 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2060 sentences.len() > 1
2061 || paragraph_lines.len() > 1
2062 || paragraph_lines
2063 .iter()
2064 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2065 }
2066 ReflowMode::Default => {
2067 // In default mode, only reflow if lines exceed limit
2068 paragraph_lines
2069 .iter()
2070 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2071 }
2072 };
2073
2074 if needs_reflow {
2075 // Calculate byte range for this paragraph
2076 // Use whole_line_range for each line and combine
2077 let start_range = line_index.whole_line_range(paragraph_start + 1);
2078 let end_line = paragraph_start + paragraph_lines.len() - 1;
2079
2080 // For the last line, we want to preserve any trailing newline
2081 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2082 // Last line without trailing newline - use line_text_range
2083 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2084 } else {
2085 // Not the last line or has trailing newline - use whole_line_range
2086 line_index.whole_line_range(end_line + 1)
2087 };
2088
2089 let byte_range = start_range.start..end_range.end;
2090
2091 // Check if the paragraph ends with a hard break and what type
2092 let hard_break_type = paragraph_lines.last().and_then(|line| {
2093 let line = line.strip_suffix('\r').unwrap_or(line);
2094 if line.ends_with('\\') {
2095 Some("\\")
2096 } else if line.ends_with(" ") {
2097 Some(" ")
2098 } else {
2099 None
2100 }
2101 });
2102
2103 // Reflow the paragraph
2104 // When line_length = 0 (no limit), use a very large value for reflow
2105 let reflow_line_length = if config.line_length.is_unlimited() {
2106 usize::MAX
2107 } else {
2108 config.line_length.get()
2109 };
2110 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2111 line_length: reflow_line_length,
2112 break_on_sentences: true,
2113 preserve_breaks: false,
2114 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2115 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2116 abbreviations: config.abbreviations_for_reflow(),
2117 length_mode: self.reflow_length_mode(),
2118 };
2119 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
2120
2121 // If the original paragraph ended with a hard break, preserve it
2122 // Preserve the original hard break format (backslash or two spaces)
2123 if let Some(break_marker) = hard_break_type
2124 && !reflowed.is_empty()
2125 {
2126 let last_idx = reflowed.len() - 1;
2127 if !has_hard_break(&reflowed[last_idx]) {
2128 reflowed[last_idx].push_str(break_marker);
2129 }
2130 }
2131
2132 let reflowed_text = reflowed.join("\n");
2133
2134 // Preserve trailing newline if the original paragraph had one
2135 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2136 format!("{reflowed_text}\n")
2137 } else {
2138 reflowed_text
2139 };
2140
2141 // Get the original text to compare
2142 let original_text = &ctx.content[byte_range.clone()];
2143
2144 // Only generate a warning if the replacement is different from the original
2145 if original_text != replacement {
2146 // Create warning with actual fix
2147 // In default mode, report the specific line that violates
2148 // In normalize mode, report the whole paragraph
2149 // In sentence-per-line mode, report the entire paragraph
2150 let (warning_line, warning_end_line) = match config.reflow_mode {
2151 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2152 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2153 // Highlight the entire paragraph that needs reformatting
2154 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2155 }
2156 ReflowMode::Default => {
2157 // Find the first line that exceeds the limit
2158 let mut violating_line = paragraph_start;
2159 for (idx, line) in paragraph_lines.iter().enumerate() {
2160 if self.calculate_effective_length(line) > config.line_length.get() {
2161 violating_line = paragraph_start + idx;
2162 break;
2163 }
2164 }
2165 (violating_line + 1, violating_line + 1)
2166 }
2167 };
2168
2169 warnings.push(LintWarning {
2170 rule_name: Some(self.name().to_string()),
2171 message: match config.reflow_mode {
2172 ReflowMode::Normalize => format!(
2173 "Paragraph could be normalized to use line length of {} characters",
2174 config.line_length.get()
2175 ),
2176 ReflowMode::SentencePerLine => {
2177 let num_sentences = split_into_sentences(¶graph_text).len();
2178 if paragraph_lines.len() == 1 {
2179 // Single line with multiple sentences
2180 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2181 } else {
2182 let num_lines = paragraph_lines.len();
2183 // Multiple lines - could be split sentences or mixed
2184 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2185 }
2186 },
2187 ReflowMode::SemanticLineBreaks => {
2188 let num_sentences = split_into_sentences(¶graph_text).len();
2189 format!(
2190 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2191 )
2192 },
2193 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2194 },
2195 line: warning_line,
2196 column: 1,
2197 end_line: warning_end_line,
2198 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2199 severity: Severity::Warning,
2200 fix: Some(crate::rule::Fix {
2201 range: byte_range,
2202 replacement,
2203 }),
2204 });
2205 }
2206 }
2207 }
2208
2209 warnings
2210 }
2211
2212 /// Calculate string length based on the configured length mode
2213 fn calculate_string_length(&self, s: &str) -> usize {
2214 match self.config.length_mode {
2215 LengthMode::Chars => s.chars().count(),
2216 LengthMode::Visual => s.width(),
2217 LengthMode::Bytes => s.len(),
2218 }
2219 }
2220
2221 /// Calculate effective line length
2222 ///
2223 /// Returns the actual display length of the line using the configured length mode.
2224 fn calculate_effective_length(&self, line: &str) -> usize {
2225 self.calculate_string_length(line)
2226 }
2227
2228 /// Calculate line length with inline link/image URLs removed.
2229 ///
2230 /// For each inline link `[text](url)` or image `` on the line,
2231 /// computes the "savings" from removing the URL portion (keeping only `[text]`
2232 /// or `![alt]`). Returns `effective_length - total_savings`.
2233 ///
2234 /// Handles nested constructs (e.g., `[](url)`) by only counting the
2235 /// outermost construct to avoid double-counting.
2236 fn calculate_text_only_length(
2237 &self,
2238 effective_length: usize,
2239 line_number: usize,
2240 ctx: &crate::lint_context::LintContext,
2241 ) -> usize {
2242 let line_range = ctx.line_index.line_content_range(line_number);
2243 let line_byte_end = line_range.end;
2244
2245 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2246 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2247
2248 for link in &ctx.links {
2249 if link.line != line_number || link.is_reference {
2250 continue;
2251 }
2252 if !matches!(link.link_type, LinkType::Inline) {
2253 continue;
2254 }
2255 // Skip cross-line links
2256 if link.byte_end > line_byte_end {
2257 continue;
2258 }
2259 // `[text]` in configured length mode
2260 let text_only_len = 2 + self.calculate_string_length(&link.text);
2261 constructs.push((link.byte_offset, link.byte_end, text_only_len));
2262 }
2263
2264 for image in &ctx.images {
2265 if image.line != line_number || image.is_reference {
2266 continue;
2267 }
2268 if !matches!(image.link_type, LinkType::Inline) {
2269 continue;
2270 }
2271 // Skip cross-line images
2272 if image.byte_end > line_byte_end {
2273 continue;
2274 }
2275 // `![alt]` in configured length mode
2276 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2277 constructs.push((image.byte_offset, image.byte_end, text_only_len));
2278 }
2279
2280 if constructs.is_empty() {
2281 return effective_length;
2282 }
2283
2284 // Sort by byte offset to handle overlapping/nested constructs
2285 constructs.sort_by_key(|&(start, _, _)| start);
2286
2287 let mut total_savings: usize = 0;
2288 let mut last_end: usize = 0;
2289
2290 for (start, end, text_only_len) in &constructs {
2291 // Skip constructs nested inside a previously counted one
2292 if *start < last_end {
2293 continue;
2294 }
2295 // Full construct length in configured length mode
2296 let full_source = &ctx.content[*start..*end];
2297 let full_len = self.calculate_string_length(full_source);
2298 total_savings += full_len.saturating_sub(*text_only_len);
2299 last_end = *end;
2300 }
2301
2302 effective_length.saturating_sub(total_savings)
2303 }
2304}