rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_list_item,
26 is_standalone_link_or_image_line, split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37 pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42 line_idx: usize,
43 data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48 Self {
49 config: MD013Config {
50 line_length: crate::types::LineLength::new(line_length),
51 code_blocks,
52 tables,
53 headings,
54 paragraphs: true, // Default to true for backwards compatibility
55 strict,
56 reflow: false,
57 reflow_mode: ReflowMode::default(),
58 length_mode: LengthMode::default(),
59 abbreviations: Vec::new(),
60 },
61 }
62 }
63
64 pub fn from_config_struct(config: MD013Config) -> Self {
65 Self { config }
66 }
67
68 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
69 fn reflow_length_mode(&self) -> ReflowLengthMode {
70 match self.config.length_mode {
71 LengthMode::Chars => ReflowLengthMode::Chars,
72 LengthMode::Visual => ReflowLengthMode::Visual,
73 LengthMode::Bytes => ReflowLengthMode::Bytes,
74 }
75 }
76
77 fn should_ignore_line(
78 &self,
79 line: &str,
80 _lines: &[&str],
81 current_line: usize,
82 ctx: &crate::lint_context::LintContext,
83 ) -> bool {
84 if self.config.strict {
85 return false;
86 }
87
88 // Quick check for common patterns before expensive regex
89 let trimmed = line.trim();
90
91 // Only skip if the entire line is a URL (quick check first)
92 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
93 return true;
94 }
95
96 // Only skip if the entire line is an image reference (quick check first)
97 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
98 return true;
99 }
100
101 // Note: link reference definitions are handled as always-exempt (even in strict mode)
102 // in the main check loop, so they don't need to be checked here.
103
104 // Code blocks with long strings (only check if in code block)
105 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
106 && !trimmed.is_empty()
107 && !line.contains(' ')
108 && !line.contains('\t')
109 {
110 return true;
111 }
112
113 false
114 }
115
116 /// Check if rule should skip based on provided config (used for inline config support)
117 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
118 // Skip if content is empty
119 if ctx.content.is_empty() {
120 return true;
121 }
122
123 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
124 if config.reflow
125 && (config.reflow_mode == ReflowMode::SentencePerLine
126 || config.reflow_mode == ReflowMode::SemanticLineBreaks
127 || config.reflow_mode == ReflowMode::Normalize)
128 {
129 return false;
130 }
131
132 // Quick check: if total content is shorter than line limit, definitely skip
133 if ctx.content.len() <= config.line_length.get() {
134 return true;
135 }
136
137 // Skip if no line exceeds the limit
138 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
139 }
140}
141
142impl Rule for MD013LineLength {
143 fn name(&self) -> &'static str {
144 "MD013"
145 }
146
147 fn description(&self) -> &'static str {
148 "Line length should not be excessive"
149 }
150
151 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
152 // Use pre-parsed inline config from LintContext
153 let config_override = ctx.inline_config().get_rule_config("MD013");
154
155 // Apply configuration override if present
156 let effective_config = if let Some(json_config) = config_override {
157 if let Some(obj) = json_config.as_object() {
158 let mut config = self.config.clone();
159 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
160 config.line_length = crate::types::LineLength::new(line_length as usize);
161 }
162 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
163 config.code_blocks = code_blocks;
164 }
165 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
166 config.tables = tables;
167 }
168 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
169 config.headings = headings;
170 }
171 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
172 config.strict = strict;
173 }
174 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
175 config.reflow = reflow;
176 }
177 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
178 config.reflow_mode = match reflow_mode {
179 "default" => ReflowMode::Default,
180 "normalize" => ReflowMode::Normalize,
181 "sentence-per-line" => ReflowMode::SentencePerLine,
182 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
183 _ => ReflowMode::default(),
184 };
185 }
186 config
187 } else {
188 self.config.clone()
189 }
190 } else {
191 self.config.clone()
192 };
193
194 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
195 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
196 if self.should_skip_with_config(ctx, &effective_config)
197 && !(effective_config.reflow
198 && (effective_config.reflow_mode == ReflowMode::Normalize
199 || effective_config.reflow_mode == ReflowMode::SentencePerLine
200 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
201 {
202 return Ok(Vec::new());
203 }
204
205 // Direct implementation without DocumentStructure
206 let mut warnings = Vec::new();
207
208 // Special handling: line_length = 0 means "no line length limit"
209 // Skip all line length checks, but still allow reflow if enabled
210 let skip_length_checks = effective_config.line_length.is_unlimited();
211
212 // Pre-filter lines that could be problematic to avoid processing all lines
213 let mut candidate_lines = Vec::new();
214 if !skip_length_checks {
215 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
216 // Skip front matter - it should never be linted
217 if line_info.in_front_matter {
218 continue;
219 }
220
221 // Quick length check first
222 if line_info.byte_len > effective_config.line_length.get() {
223 candidate_lines.push(line_idx);
224 }
225 }
226 }
227
228 // If no candidate lines and not in normalize or sentence-per-line mode, early return
229 if candidate_lines.is_empty()
230 && !(effective_config.reflow
231 && (effective_config.reflow_mode == ReflowMode::Normalize
232 || effective_config.reflow_mode == ReflowMode::SentencePerLine
233 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
234 {
235 return Ok(warnings);
236 }
237
238 let lines = ctx.raw_lines();
239
240 // Create a quick lookup set for heading lines
241 // We need this for both the heading skip check AND the paragraphs check
242 let heading_lines_set: std::collections::HashSet<usize> = ctx
243 .lines
244 .iter()
245 .enumerate()
246 .filter(|(_, line)| line.heading.is_some())
247 .map(|(idx, _)| idx + 1)
248 .collect();
249
250 // Use pre-computed table blocks from context
251 // We need this for both the table skip check AND the paragraphs check
252 let table_blocks = &ctx.table_blocks;
253 let mut table_lines_set = std::collections::HashSet::new();
254 for table in table_blocks {
255 table_lines_set.insert(table.header_line + 1);
256 table_lines_set.insert(table.delimiter_line + 1);
257 for &line in &table.content_lines {
258 table_lines_set.insert(line + 1);
259 }
260 }
261
262 // Process candidate lines for line length checks
263 for &line_idx in &candidate_lines {
264 let line_number = line_idx + 1;
265 let line = lines[line_idx];
266
267 // Calculate actual line length (used in warning messages)
268 let effective_length = self.calculate_effective_length(line);
269
270 // Use single line length limit for all content
271 let line_limit = effective_config.line_length.get();
272
273 // In non-strict mode, forgive the trailing non-whitespace run.
274 // If the line only exceeds the limit because of a long token at the end
275 // (URL, link chain, identifier), it passes. This matches markdownlint's
276 // behavior: line.replace(/\S*$/u, "#")
277 let check_length = if effective_config.strict {
278 effective_length
279 } else {
280 match line.rfind(char::is_whitespace) {
281 Some(pos) => {
282 let ws_char = line[pos..].chars().next().unwrap();
283 let prefix_end = pos + ws_char.len_utf8();
284 self.calculate_string_length(&line[..prefix_end]) + 1
285 }
286 None => 1, // No whitespace — entire line is a single token
287 }
288 };
289
290 // Skip lines where the check length is within the limit
291 if check_length <= line_limit {
292 continue;
293 }
294
295 // Semantic link understanding: suppress when excess comes entirely from inline URLs
296 if !effective_config.strict {
297 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
298 if text_only_length <= line_limit {
299 continue;
300 }
301 }
302
303 // Skip mkdocstrings blocks (already handled by LintContext)
304 if ctx.lines[line_idx].in_mkdocstrings {
305 continue;
306 }
307
308 // Link reference definitions are always exempt, even in strict mode.
309 // There's no way to shorten them without breaking the URL.
310 {
311 let trimmed = line.trim();
312 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
313 continue;
314 }
315 }
316
317 // Skip various block types efficiently
318 if !effective_config.strict {
319 // Lines whose only content is a link/image are exempt.
320 // After stripping list markers, blockquote markers, and emphasis,
321 // if only a link or image remains, there is no way to shorten it.
322 if is_standalone_link_or_image_line(line) {
323 continue;
324 }
325
326 // Skip setext heading underlines
327 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
328 continue;
329 }
330
331 // Skip block elements according to config flags
332 // The flags mean: true = check these elements, false = skip these elements
333 // So we skip when the flag is FALSE and the line is in that element type
334 if (!effective_config.headings && heading_lines_set.contains(&line_number))
335 || (!effective_config.code_blocks
336 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
337 || (!effective_config.tables && table_lines_set.contains(&line_number))
338 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
339 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
340 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
341 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
342 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
343 {
344 continue;
345 }
346
347 // Check if this is a paragraph/regular text line
348 // If paragraphs = false, skip lines that are NOT in special blocks
349 if !effective_config.paragraphs {
350 let is_special_block = heading_lines_set.contains(&line_number)
351 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
352 || table_lines_set.contains(&line_number)
353 || ctx.lines[line_number - 1].blockquote.is_some()
354 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
355 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
356 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
357 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
358 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
359 || ctx
360 .line_info(line_number)
361 .is_some_and(|info| info.in_mkdocs_container());
362
363 // Skip regular paragraph text when paragraphs = false
364 if !is_special_block {
365 continue;
366 }
367 }
368
369 // Skip lines that are only a URL, image ref, or link ref
370 if self.should_ignore_line(line, lines, line_idx, ctx) {
371 continue;
372 }
373 }
374
375 // In sentence-per-line mode, check if this is a single long sentence
376 // If so, emit a warning without a fix (user must manually rephrase)
377 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
378 let sentences = split_into_sentences(line.trim());
379 if sentences.len() == 1 {
380 // Single sentence that's too long - warn but don't auto-fix
381 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
382
383 let (start_line, start_col, end_line, end_col) =
384 calculate_excess_range(line_number, line, line_limit);
385
386 warnings.push(LintWarning {
387 rule_name: Some(self.name().to_string()),
388 message,
389 line: start_line,
390 column: start_col,
391 end_line,
392 end_column: end_col,
393 severity: Severity::Warning,
394 fix: None, // No auto-fix for long single sentences
395 });
396 continue;
397 }
398 // Multiple sentences will be handled by paragraph-based reflow
399 continue;
400 }
401
402 // In semantic-line-breaks mode, skip per-line checks —
403 // all reflow is handled at the paragraph level with cascading splits
404 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
405 continue;
406 }
407
408 // Don't provide fix for individual lines when reflow is enabled
409 // Paragraph-based fixes will be handled separately
410 let fix = None;
411
412 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
413
414 // Calculate precise character range for the excess portion
415 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
416
417 warnings.push(LintWarning {
418 rule_name: Some(self.name().to_string()),
419 message,
420 line: start_line,
421 column: start_col,
422 end_line,
423 end_column: end_col,
424 severity: Severity::Warning,
425 fix,
426 });
427 }
428
429 // If reflow is enabled, generate paragraph-based fixes
430 if effective_config.reflow {
431 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
432 // Merge paragraph warnings with line warnings, removing duplicates
433 for pw in paragraph_warnings {
434 // Remove any line warnings that overlap with this paragraph
435 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
436 warnings.push(pw);
437 }
438 }
439
440 Ok(warnings)
441 }
442
443 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
444 // For CLI usage, apply fixes from warnings
445 // LSP will use the warning-based fixes directly
446 let warnings = self.check(ctx)?;
447
448 // If there are no fixes, return content unchanged
449 if !warnings.iter().any(|w| w.fix.is_some()) {
450 return Ok(ctx.content.to_string());
451 }
452
453 // Apply warning-based fixes
454 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
455 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
456 }
457
458 fn as_any(&self) -> &dyn std::any::Any {
459 self
460 }
461
462 fn category(&self) -> RuleCategory {
463 RuleCategory::Whitespace
464 }
465
466 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
467 self.should_skip_with_config(ctx, &self.config)
468 }
469
470 fn default_config_section(&self) -> Option<(String, toml::Value)> {
471 let default_config = MD013Config::default();
472 let json_value = serde_json::to_value(&default_config).ok()?;
473 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
474
475 if let toml::Value::Table(table) = toml_value {
476 if !table.is_empty() {
477 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
478 } else {
479 None
480 }
481 } else {
482 None
483 }
484 }
485
486 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
487 let mut aliases = std::collections::HashMap::new();
488 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
489 Some(aliases)
490 }
491
492 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
493 where
494 Self: Sized,
495 {
496 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
497 // Use global line_length if rule-specific config still has default value
498 if rule_config.line_length.get() == 80 {
499 rule_config.line_length = config.global.line_length;
500 }
501 Box::new(Self::from_config_struct(rule_config))
502 }
503}
504
505impl MD013LineLength {
506 fn is_blockquote_content_boundary(
507 &self,
508 content: &str,
509 line_num: usize,
510 ctx: &crate::lint_context::LintContext,
511 ) -> bool {
512 let trimmed = content.trim();
513
514 trimmed.is_empty()
515 || ctx.line_info(line_num).is_some_and(|info| {
516 info.in_code_block
517 || info.in_front_matter
518 || info.in_html_block
519 || info.in_html_comment
520 || info.in_esm_block
521 || info.in_jsx_expression
522 || info.in_mdx_comment
523 || info.in_mkdocstrings
524 || info.in_mkdocs_container()
525 || info.is_div_marker
526 })
527 || trimmed.starts_with('#')
528 || trimmed.starts_with("```")
529 || trimmed.starts_with("~~~")
530 || trimmed.starts_with('>')
531 || TableUtils::is_potential_table_row(content)
532 || is_list_item(trimmed)
533 || is_horizontal_rule(trimmed)
534 || (trimmed.starts_with('[') && content.contains("]:"))
535 || is_template_directive_only(content)
536 || is_standalone_attr_list(content)
537 || is_snippet_block_delimiter(content)
538 || is_github_alert_marker(trimmed)
539 }
540
541 fn generate_blockquote_paragraph_fix(
542 &self,
543 ctx: &crate::lint_context::LintContext,
544 config: &MD013Config,
545 lines: &[&str],
546 line_index: &LineIndex,
547 start_idx: usize,
548 line_ending: &str,
549 ) -> (Option<LintWarning>, usize) {
550 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
551 return (None, start_idx + 1);
552 };
553 let target_level = start_bq.nesting_level;
554
555 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
556 let mut i = start_idx;
557
558 while i < lines.len() {
559 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
560 break;
561 }
562
563 let line_num = i + 1;
564 if line_num > ctx.lines.len() {
565 break;
566 }
567
568 if lines[i].trim().is_empty() {
569 break;
570 }
571
572 let line_bq = ctx.lines[i].blockquote.as_deref();
573 if let Some(bq) = line_bq {
574 if bq.nesting_level != target_level {
575 break;
576 }
577
578 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
579 break;
580 }
581
582 collected.push(CollectedBlockquoteLine {
583 line_idx: i,
584 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
585 });
586 i += 1;
587 continue;
588 }
589
590 let lazy_content = lines[i].trim_start();
591 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
592 break;
593 }
594
595 collected.push(CollectedBlockquoteLine {
596 line_idx: i,
597 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
598 });
599 i += 1;
600 }
601
602 if collected.is_empty() {
603 return (None, start_idx + 1);
604 }
605
606 let next_idx = i;
607 let paragraph_start = collected[0].line_idx;
608 let end_line = collected[collected.len() - 1].line_idx;
609 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
610 let paragraph_text = line_data
611 .iter()
612 .map(|d| d.content.as_str())
613 .collect::<Vec<_>>()
614 .join(" ");
615
616 let contains_definition_list = line_data
617 .iter()
618 .any(|d| crate::utils::is_definition_list_item(&d.content));
619 if contains_definition_list {
620 return (None, next_idx);
621 }
622
623 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
624 if contains_snippets {
625 return (None, next_idx);
626 }
627
628 let needs_reflow = match config.reflow_mode {
629 ReflowMode::Normalize => line_data.len() > 1,
630 ReflowMode::SentencePerLine => {
631 let sentences = split_into_sentences(¶graph_text);
632 sentences.len() > 1 || line_data.len() > 1
633 }
634 ReflowMode::SemanticLineBreaks => {
635 let sentences = split_into_sentences(¶graph_text);
636 sentences.len() > 1
637 || line_data.len() > 1
638 || collected
639 .iter()
640 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
641 }
642 ReflowMode::Default => collected
643 .iter()
644 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
645 };
646
647 if !needs_reflow {
648 return (None, next_idx);
649 }
650
651 let fallback_prefix = start_bq.prefix.clone();
652 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
653 let continuation_style = blockquote_continuation_style(&line_data);
654
655 let reflow_line_length = if config.line_length.is_unlimited() {
656 usize::MAX
657 } else {
658 config
659 .line_length
660 .get()
661 .saturating_sub(self.calculate_string_length(&explicit_prefix))
662 .max(1)
663 };
664
665 let reflow_options = crate::utils::text_reflow::ReflowOptions {
666 line_length: reflow_line_length,
667 break_on_sentences: true,
668 preserve_breaks: false,
669 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
670 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
671 abbreviations: config.abbreviations_for_reflow(),
672 length_mode: self.reflow_length_mode(),
673 };
674
675 let reflowed_with_style =
676 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
677
678 if reflowed_with_style.is_empty() {
679 return (None, next_idx);
680 }
681
682 let reflowed_text = reflowed_with_style.join(line_ending);
683
684 let start_range = line_index.whole_line_range(paragraph_start + 1);
685 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
686 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
687 } else {
688 line_index.whole_line_range(end_line + 1)
689 };
690 let byte_range = start_range.start..end_range.end;
691
692 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
693 format!("{reflowed_text}{line_ending}")
694 } else {
695 reflowed_text
696 };
697
698 let original_text = &ctx.content[byte_range.clone()];
699 if original_text == replacement {
700 return (None, next_idx);
701 }
702
703 let (warning_line, warning_end_line) = match config.reflow_mode {
704 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
705 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
706 ReflowMode::Default => {
707 let violating_line = collected
708 .iter()
709 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
710 .map(|line| line.line_idx + 1)
711 .unwrap_or(paragraph_start + 1);
712 (violating_line, violating_line)
713 }
714 };
715
716 let warning = LintWarning {
717 rule_name: Some(self.name().to_string()),
718 message: match config.reflow_mode {
719 ReflowMode::Normalize => format!(
720 "Paragraph could be normalized to use line length of {} characters",
721 config.line_length.get()
722 ),
723 ReflowMode::SentencePerLine => {
724 let num_sentences = split_into_sentences(¶graph_text).len();
725 if line_data.len() == 1 {
726 format!("Line contains {num_sentences} sentences (one sentence per line required)")
727 } else {
728 let num_lines = line_data.len();
729 format!(
730 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
731 )
732 }
733 }
734 ReflowMode::SemanticLineBreaks => {
735 let num_sentences = split_into_sentences(¶graph_text).len();
736 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
737 }
738 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
739 },
740 line: warning_line,
741 column: 1,
742 end_line: warning_end_line,
743 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
744 severity: Severity::Warning,
745 fix: Some(crate::rule::Fix {
746 range: byte_range,
747 replacement,
748 }),
749 };
750
751 (Some(warning), next_idx)
752 }
753
754 /// Generate paragraph-based fixes
755 fn generate_paragraph_fixes(
756 &self,
757 ctx: &crate::lint_context::LintContext,
758 config: &MD013Config,
759 lines: &[&str],
760 ) -> Vec<LintWarning> {
761 let mut warnings = Vec::new();
762 let line_index = LineIndex::new(ctx.content);
763
764 // Detect the content's line ending style to preserve it in replacements.
765 // The LSP receives content from editors which may use CRLF (Windows).
766 // Replacements must match the original line endings to avoid false positives.
767 let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
768
769 let mut i = 0;
770 while i < lines.len() {
771 let line_num = i + 1;
772
773 // Handle blockquote paragraphs with style-preserving reflow.
774 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
775 let (warning, next_idx) =
776 self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
777 if let Some(warning) = warning {
778 warnings.push(warning);
779 }
780 i = next_idx;
781 continue;
782 }
783
784 // Skip special structures (but NOT MkDocs containers - those get special handling)
785 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
786 info.in_code_block
787 || info.in_front_matter
788 || info.in_html_block
789 || info.in_html_comment
790 || info.in_esm_block
791 || info.in_jsx_expression
792 || info.in_mdx_comment
793 || info.in_mkdocstrings
794 });
795
796 if should_skip_due_to_line_info
797 || lines[i].trim().starts_with('#')
798 || TableUtils::is_potential_table_row(lines[i])
799 || lines[i].trim().is_empty()
800 || is_horizontal_rule(lines[i].trim())
801 || is_template_directive_only(lines[i])
802 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
803 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
804 {
805 i += 1;
806 continue;
807 }
808
809 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
810 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
811 // Skip admonition/tab marker lines — only reflow their indented content
812 let current_line = lines[i];
813 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
814 i += 1;
815 continue;
816 }
817
818 let container_start = i;
819
820 // Detect the actual indent level from the first content line
821 // (supports nested admonitions with 8+ spaces)
822 let first_line = lines[i];
823 let base_indent_len = first_line.len() - first_line.trim_start().len();
824 let base_indent: String = " ".repeat(base_indent_len);
825
826 // Collect consecutive MkDocs container paragraph lines
827 let mut container_lines: Vec<&str> = Vec::new();
828 while i < lines.len() {
829 let current_line_num = i + 1;
830 let line_info = ctx.line_info(current_line_num);
831
832 // Stop if we leave the MkDocs container
833 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
834 break;
835 }
836
837 let line = lines[i];
838
839 // Stop at paragraph boundaries within the container
840 if line.trim().is_empty() {
841 break;
842 }
843
844 // Skip list items, code blocks, headings within containers
845 if is_list_item(line.trim())
846 || line.trim().starts_with("```")
847 || line.trim().starts_with("~~~")
848 || line.trim().starts_with('#')
849 {
850 break;
851 }
852
853 container_lines.push(line);
854 i += 1;
855 }
856
857 if container_lines.is_empty() {
858 // Must advance i to avoid infinite loop when we encounter
859 // non-paragraph content (code block, list, heading, empty line)
860 // at the start of an MkDocs container
861 i += 1;
862 continue;
863 }
864
865 // Strip the base indent from each line and join for reflow
866 let stripped_lines: Vec<&str> = container_lines
867 .iter()
868 .map(|line| {
869 if line.starts_with(&base_indent) {
870 &line[base_indent_len..]
871 } else {
872 line.trim_start()
873 }
874 })
875 .collect();
876 let paragraph_text = stripped_lines.join(" ");
877
878 // Check if reflow is needed
879 let needs_reflow = match config.reflow_mode {
880 ReflowMode::Normalize => container_lines.len() > 1,
881 ReflowMode::SentencePerLine => {
882 let sentences = split_into_sentences(¶graph_text);
883 sentences.len() > 1 || container_lines.len() > 1
884 }
885 ReflowMode::SemanticLineBreaks => {
886 let sentences = split_into_sentences(¶graph_text);
887 sentences.len() > 1
888 || container_lines.len() > 1
889 || container_lines
890 .iter()
891 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
892 }
893 ReflowMode::Default => container_lines
894 .iter()
895 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
896 };
897
898 if !needs_reflow {
899 continue;
900 }
901
902 // Calculate byte range for this container paragraph
903 let start_range = line_index.whole_line_range(container_start + 1);
904 let end_line = container_start + container_lines.len() - 1;
905 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
906 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
907 } else {
908 line_index.whole_line_range(end_line + 1)
909 };
910 let byte_range = start_range.start..end_range.end;
911
912 // Reflow with adjusted line length (accounting for the 4-space indent)
913 let reflow_line_length = if config.line_length.is_unlimited() {
914 usize::MAX
915 } else {
916 config.line_length.get().saturating_sub(base_indent_len).max(1)
917 };
918 let reflow_options = crate::utils::text_reflow::ReflowOptions {
919 line_length: reflow_line_length,
920 break_on_sentences: true,
921 preserve_breaks: false,
922 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
923 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
924 abbreviations: config.abbreviations_for_reflow(),
925 length_mode: self.reflow_length_mode(),
926 };
927 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
928
929 // Re-add the 4-space indent to each reflowed line
930 let reflowed_with_indent: Vec<String> =
931 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
932 let reflowed_text = reflowed_with_indent.join(line_ending);
933
934 // Preserve trailing newline
935 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
936 format!("{reflowed_text}{line_ending}")
937 } else {
938 reflowed_text
939 };
940
941 // Only generate a warning if the replacement is different
942 let original_text = &ctx.content[byte_range.clone()];
943 if original_text != replacement {
944 warnings.push(LintWarning {
945 rule_name: Some(self.name().to_string()),
946 message: format!(
947 "Line length {} exceeds {} characters (in MkDocs container)",
948 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
949 config.line_length.get()
950 ),
951 line: container_start + 1,
952 column: 1,
953 end_line: end_line + 1,
954 end_column: lines[end_line].len() + 1,
955 severity: Severity::Warning,
956 fix: Some(crate::rule::Fix {
957 range: byte_range,
958 replacement,
959 }),
960 });
961 }
962 continue;
963 }
964
965 // Helper function to detect semantic line markers
966 let is_semantic_line = |content: &str| -> bool {
967 let trimmed = content.trim_start();
968 let semantic_markers = [
969 "NOTE:",
970 "WARNING:",
971 "IMPORTANT:",
972 "CAUTION:",
973 "TIP:",
974 "DANGER:",
975 "HINT:",
976 "INFO:",
977 ];
978 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
979 };
980
981 // Helper function to detect fence markers (opening or closing)
982 let is_fence_marker = |content: &str| -> bool {
983 let trimmed = content.trim_start();
984 trimmed.starts_with("```") || trimmed.starts_with("~~~")
985 };
986
987 // Check if this is a list item - handle it specially
988 let trimmed = lines[i].trim();
989 if is_list_item(trimmed) {
990 // Collect the entire list item including continuation lines
991 let list_start = i;
992 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
993 let marker_len = marker.len();
994
995 // Track lines and their types (content, code block, fence, nested list)
996 #[derive(Clone)]
997 enum LineType {
998 Content(String),
999 CodeBlock(String, usize), // content and original indent
1000 NestedListItem(String, usize), // full line content and original indent
1001 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
1002 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1003 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
1004 Empty,
1005 }
1006
1007 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1008 i += 1;
1009
1010 // Collect continuation lines using ctx.lines for metadata
1011 while i < lines.len() {
1012 let line_info = &ctx.lines[i];
1013
1014 // Use pre-computed is_blank from ctx
1015 if line_info.is_blank {
1016 // Empty line - check if next line is indented (part of list item)
1017 if i + 1 < lines.len() {
1018 let next_info = &ctx.lines[i + 1];
1019
1020 // Check if next line is indented enough to be continuation
1021 if !next_info.is_blank && next_info.indent >= marker_len {
1022 // This blank line is between paragraphs/blocks in the list item
1023 list_item_lines.push(LineType::Empty);
1024 i += 1;
1025 continue;
1026 }
1027 }
1028 // No indented line after blank, end of list item
1029 break;
1030 }
1031
1032 // Use pre-computed indent from ctx
1033 let indent = line_info.indent;
1034
1035 // Valid continuation must be indented at least marker_len
1036 if indent >= marker_len {
1037 let trimmed = line_info.content(ctx.content).trim();
1038
1039 // Use pre-computed in_code_block from ctx
1040 if line_info.in_code_block {
1041 list_item_lines.push(LineType::CodeBlock(
1042 line_info.content(ctx.content)[indent..].to_string(),
1043 indent,
1044 ));
1045 i += 1;
1046 continue;
1047 }
1048
1049 // Check if this is a SIBLING list item (breaks parent)
1050 // Nested lists are indented >= marker_len and are PART of the parent item
1051 // Siblings are at indent < marker_len (at or before parent marker)
1052 if is_list_item(trimmed) && indent < marker_len {
1053 // This is a sibling item at same or higher level - end parent item
1054 break;
1055 }
1056
1057 // Check if this is a NESTED list item marker
1058 // Nested lists should be processed separately UNLESS they're part of a
1059 // multi-paragraph list item (indicated by a blank line before them OR
1060 // it's a continuation of an already-started nested list)
1061 if is_list_item(trimmed) && indent >= marker_len {
1062 // Check if there was a blank line before this (multi-paragraph context)
1063 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
1064
1065 // Check if we've already seen nested list content (another nested item)
1066 let has_nested_content = list_item_lines.iter().any(|line| {
1067 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
1068 || matches!(line, LineType::NestedListItem(_, _))
1069 });
1070
1071 if !has_blank_before && !has_nested_content {
1072 // Single-paragraph context with no prior nested items: starts a new item
1073 // End parent collection; nested list will be processed next
1074 break;
1075 }
1076 // else: multi-paragraph context or continuation of nested list, keep collecting
1077 // Mark this as a nested list item to preserve its structure
1078 list_item_lines.push(LineType::NestedListItem(
1079 line_info.content(ctx.content)[indent..].to_string(),
1080 indent,
1081 ));
1082 i += 1;
1083 continue;
1084 }
1085
1086 // Normal continuation: marker_len to marker_len+3
1087 if indent <= marker_len + 3 {
1088 // Extract content (remove indentation and trailing whitespace)
1089 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1090 // See: https://github.com/rvben/rumdl/issues/76
1091 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1092
1093 // Check if this is a div marker (::: opening or closing)
1094 // These must be preserved on their own line, not merged into paragraphs
1095 if line_info.is_div_marker {
1096 list_item_lines.push(LineType::DivMarker(content));
1097 }
1098 // Check if this is a fence marker (opening or closing)
1099 // These should be treated as code block lines, not paragraph content
1100 else if is_fence_marker(&content) {
1101 list_item_lines.push(LineType::CodeBlock(content, indent));
1102 }
1103 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1104 else if is_semantic_line(&content) {
1105 list_item_lines.push(LineType::SemanticLine(content));
1106 }
1107 // Check if this is a snippet block delimiter (-8<- or --8<--)
1108 // These must be preserved on their own lines for MkDocs Snippets extension
1109 else if is_snippet_block_delimiter(&content) {
1110 list_item_lines.push(LineType::SnippetLine(content));
1111 } else {
1112 list_item_lines.push(LineType::Content(content));
1113 }
1114 i += 1;
1115 } else {
1116 // indent >= marker_len + 4: indented code block
1117 list_item_lines.push(LineType::CodeBlock(
1118 line_info.content(ctx.content)[indent..].to_string(),
1119 indent,
1120 ));
1121 i += 1;
1122 }
1123 } else {
1124 // Not indented enough, end of list item
1125 break;
1126 }
1127 }
1128
1129 let indent_size = marker_len;
1130 let expected_indent = " ".repeat(indent_size);
1131
1132 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1133 #[derive(Clone)]
1134 enum Block {
1135 Paragraph(Vec<String>),
1136 Code {
1137 lines: Vec<(String, usize)>, // (content, indent) pairs
1138 has_preceding_blank: bool, // Whether there was a blank line before this block
1139 },
1140 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
1141 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1142 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
1143 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1144 Html {
1145 lines: Vec<String>, // HTML content preserved exactly as-is
1146 has_preceding_blank: bool, // Whether there was a blank line before this block
1147 },
1148 }
1149
1150 // HTML tag detection helpers
1151 // Block-level HTML tags that should trigger HTML block detection
1152 const BLOCK_LEVEL_TAGS: &[&str] = &[
1153 "div",
1154 "details",
1155 "summary",
1156 "section",
1157 "article",
1158 "header",
1159 "footer",
1160 "nav",
1161 "aside",
1162 "main",
1163 "table",
1164 "thead",
1165 "tbody",
1166 "tfoot",
1167 "tr",
1168 "td",
1169 "th",
1170 "ul",
1171 "ol",
1172 "li",
1173 "dl",
1174 "dt",
1175 "dd",
1176 "pre",
1177 "blockquote",
1178 "figure",
1179 "figcaption",
1180 "form",
1181 "fieldset",
1182 "legend",
1183 "hr",
1184 "p",
1185 "h1",
1186 "h2",
1187 "h3",
1188 "h4",
1189 "h5",
1190 "h6",
1191 "style",
1192 "script",
1193 "noscript",
1194 ];
1195
1196 fn is_block_html_opening_tag(line: &str) -> Option<String> {
1197 let trimmed = line.trim();
1198
1199 // Check for HTML comments
1200 if trimmed.starts_with("<!--") {
1201 return Some("!--".to_string());
1202 }
1203
1204 // Check for opening tags
1205 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1206 // Extract tag name from <tagname ...> or <tagname>
1207 let after_bracket = &trimmed[1..];
1208 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1209 let tag_name = after_bracket[..end].to_lowercase();
1210
1211 // Only treat as block if it's a known block-level tag
1212 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1213 return Some(tag_name);
1214 }
1215 }
1216 }
1217 None
1218 }
1219
1220 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1221 let trimmed = line.trim();
1222
1223 // Special handling for HTML comments
1224 if tag_name == "!--" {
1225 return trimmed.ends_with("-->");
1226 }
1227
1228 // Check for closing tags: </tagname> or </tagname ...>
1229 trimmed.starts_with(&format!("</{tag_name}>"))
1230 || trimmed.starts_with(&format!("</{tag_name} "))
1231 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1232 }
1233
1234 fn is_self_closing_tag(line: &str) -> bool {
1235 let trimmed = line.trim();
1236 trimmed.ends_with("/>")
1237 }
1238
1239 let mut blocks: Vec<Block> = Vec::new();
1240 let mut current_paragraph: Vec<String> = Vec::new();
1241 let mut current_code_block: Vec<(String, usize)> = Vec::new();
1242 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
1243 let mut current_html_block: Vec<String> = Vec::new();
1244 let mut html_tag_stack: Vec<String> = Vec::new();
1245 let mut in_code = false;
1246 let mut in_nested_list = false;
1247 let mut in_html_block = false;
1248 let mut had_preceding_blank = false; // Track if we just saw an empty line
1249 let mut code_block_has_preceding_blank = false; // Track blank before current code block
1250 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1251
1252 for line in &list_item_lines {
1253 match line {
1254 LineType::Empty => {
1255 if in_code {
1256 current_code_block.push((String::new(), 0));
1257 } else if in_nested_list {
1258 current_nested_list.push((String::new(), 0));
1259 } else if in_html_block {
1260 // Allow blank lines inside HTML blocks
1261 current_html_block.push(String::new());
1262 } else if !current_paragraph.is_empty() {
1263 blocks.push(Block::Paragraph(current_paragraph.clone()));
1264 current_paragraph.clear();
1265 }
1266 // Mark that we saw a blank line
1267 had_preceding_blank = true;
1268 }
1269 LineType::Content(content) => {
1270 // Check if we're currently in an HTML block
1271 if in_html_block {
1272 current_html_block.push(content.clone());
1273
1274 // Check if this line closes any open HTML tags
1275 if let Some(last_tag) = html_tag_stack.last() {
1276 if is_html_closing_tag(content, last_tag) {
1277 html_tag_stack.pop();
1278
1279 // If stack is empty, HTML block is complete
1280 if html_tag_stack.is_empty() {
1281 blocks.push(Block::Html {
1282 lines: current_html_block.clone(),
1283 has_preceding_blank: html_block_has_preceding_blank,
1284 });
1285 current_html_block.clear();
1286 in_html_block = false;
1287 }
1288 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1289 // Nested opening tag within HTML block
1290 if !is_self_closing_tag(content) {
1291 html_tag_stack.push(new_tag);
1292 }
1293 }
1294 }
1295 had_preceding_blank = false;
1296 } else {
1297 // Not in HTML block - check if this line starts one
1298 if let Some(tag_name) = is_block_html_opening_tag(content) {
1299 // Flush current paragraph before starting HTML block
1300 if in_code {
1301 blocks.push(Block::Code {
1302 lines: current_code_block.clone(),
1303 has_preceding_blank: code_block_has_preceding_blank,
1304 });
1305 current_code_block.clear();
1306 in_code = false;
1307 } else if in_nested_list {
1308 blocks.push(Block::NestedList(current_nested_list.clone()));
1309 current_nested_list.clear();
1310 in_nested_list = false;
1311 } else if !current_paragraph.is_empty() {
1312 blocks.push(Block::Paragraph(current_paragraph.clone()));
1313 current_paragraph.clear();
1314 }
1315
1316 // Start new HTML block
1317 in_html_block = true;
1318 html_block_has_preceding_blank = had_preceding_blank;
1319 current_html_block.push(content.clone());
1320
1321 // Check if it's self-closing or needs a closing tag
1322 if is_self_closing_tag(content) {
1323 // Self-closing tag - complete the HTML block immediately
1324 blocks.push(Block::Html {
1325 lines: current_html_block.clone(),
1326 has_preceding_blank: html_block_has_preceding_blank,
1327 });
1328 current_html_block.clear();
1329 in_html_block = false;
1330 } else {
1331 // Regular opening tag - push to stack
1332 html_tag_stack.push(tag_name);
1333 }
1334 } else {
1335 // Regular content line - add to paragraph
1336 if in_code {
1337 // Switching from code to content
1338 blocks.push(Block::Code {
1339 lines: current_code_block.clone(),
1340 has_preceding_blank: code_block_has_preceding_blank,
1341 });
1342 current_code_block.clear();
1343 in_code = false;
1344 } else if in_nested_list {
1345 // Switching from nested list to content
1346 blocks.push(Block::NestedList(current_nested_list.clone()));
1347 current_nested_list.clear();
1348 in_nested_list = false;
1349 }
1350 current_paragraph.push(content.clone());
1351 }
1352 had_preceding_blank = false; // Reset after content
1353 }
1354 }
1355 LineType::CodeBlock(content, indent) => {
1356 if in_nested_list {
1357 // Switching from nested list to code
1358 blocks.push(Block::NestedList(current_nested_list.clone()));
1359 current_nested_list.clear();
1360 in_nested_list = false;
1361 } else if in_html_block {
1362 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1363 blocks.push(Block::Html {
1364 lines: current_html_block.clone(),
1365 has_preceding_blank: html_block_has_preceding_blank,
1366 });
1367 current_html_block.clear();
1368 html_tag_stack.clear();
1369 in_html_block = false;
1370 }
1371 if !in_code {
1372 // Switching from content to code
1373 if !current_paragraph.is_empty() {
1374 blocks.push(Block::Paragraph(current_paragraph.clone()));
1375 current_paragraph.clear();
1376 }
1377 in_code = true;
1378 // Record whether there was a blank line before this code block
1379 code_block_has_preceding_blank = had_preceding_blank;
1380 }
1381 current_code_block.push((content.clone(), *indent));
1382 had_preceding_blank = false; // Reset after code
1383 }
1384 LineType::NestedListItem(content, indent) => {
1385 if in_code {
1386 // Switching from code to nested list
1387 blocks.push(Block::Code {
1388 lines: current_code_block.clone(),
1389 has_preceding_blank: code_block_has_preceding_blank,
1390 });
1391 current_code_block.clear();
1392 in_code = false;
1393 } else if in_html_block {
1394 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1395 blocks.push(Block::Html {
1396 lines: current_html_block.clone(),
1397 has_preceding_blank: html_block_has_preceding_blank,
1398 });
1399 current_html_block.clear();
1400 html_tag_stack.clear();
1401 in_html_block = false;
1402 }
1403 if !in_nested_list {
1404 // Switching from content to nested list
1405 if !current_paragraph.is_empty() {
1406 blocks.push(Block::Paragraph(current_paragraph.clone()));
1407 current_paragraph.clear();
1408 }
1409 in_nested_list = true;
1410 }
1411 current_nested_list.push((content.clone(), *indent));
1412 had_preceding_blank = false; // Reset after nested list
1413 }
1414 LineType::SemanticLine(content) => {
1415 // Semantic lines are standalone - flush any current block and add as separate block
1416 if in_code {
1417 blocks.push(Block::Code {
1418 lines: current_code_block.clone(),
1419 has_preceding_blank: code_block_has_preceding_blank,
1420 });
1421 current_code_block.clear();
1422 in_code = false;
1423 } else if in_nested_list {
1424 blocks.push(Block::NestedList(current_nested_list.clone()));
1425 current_nested_list.clear();
1426 in_nested_list = false;
1427 } else if in_html_block {
1428 blocks.push(Block::Html {
1429 lines: current_html_block.clone(),
1430 has_preceding_blank: html_block_has_preceding_blank,
1431 });
1432 current_html_block.clear();
1433 html_tag_stack.clear();
1434 in_html_block = false;
1435 } else if !current_paragraph.is_empty() {
1436 blocks.push(Block::Paragraph(current_paragraph.clone()));
1437 current_paragraph.clear();
1438 }
1439 // Add semantic line as its own block
1440 blocks.push(Block::SemanticLine(content.clone()));
1441 had_preceding_blank = false; // Reset after semantic line
1442 }
1443 LineType::SnippetLine(content) => {
1444 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1445 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1446 if in_code {
1447 blocks.push(Block::Code {
1448 lines: current_code_block.clone(),
1449 has_preceding_blank: code_block_has_preceding_blank,
1450 });
1451 current_code_block.clear();
1452 in_code = false;
1453 } else if in_nested_list {
1454 blocks.push(Block::NestedList(current_nested_list.clone()));
1455 current_nested_list.clear();
1456 in_nested_list = false;
1457 } else if in_html_block {
1458 blocks.push(Block::Html {
1459 lines: current_html_block.clone(),
1460 has_preceding_blank: html_block_has_preceding_blank,
1461 });
1462 current_html_block.clear();
1463 html_tag_stack.clear();
1464 in_html_block = false;
1465 } else if !current_paragraph.is_empty() {
1466 blocks.push(Block::Paragraph(current_paragraph.clone()));
1467 current_paragraph.clear();
1468 }
1469 // Add snippet line as its own block
1470 blocks.push(Block::SnippetLine(content.clone()));
1471 had_preceding_blank = false;
1472 }
1473 LineType::DivMarker(content) => {
1474 // Div markers (::: opening or closing) are standalone structural delimiters
1475 // Flush any current block and add as separate block
1476 if in_code {
1477 blocks.push(Block::Code {
1478 lines: current_code_block.clone(),
1479 has_preceding_blank: code_block_has_preceding_blank,
1480 });
1481 current_code_block.clear();
1482 in_code = false;
1483 } else if in_nested_list {
1484 blocks.push(Block::NestedList(current_nested_list.clone()));
1485 current_nested_list.clear();
1486 in_nested_list = false;
1487 } else if in_html_block {
1488 blocks.push(Block::Html {
1489 lines: current_html_block.clone(),
1490 has_preceding_blank: html_block_has_preceding_blank,
1491 });
1492 current_html_block.clear();
1493 html_tag_stack.clear();
1494 in_html_block = false;
1495 } else if !current_paragraph.is_empty() {
1496 blocks.push(Block::Paragraph(current_paragraph.clone()));
1497 current_paragraph.clear();
1498 }
1499 blocks.push(Block::DivMarker(content.clone()));
1500 had_preceding_blank = false;
1501 }
1502 }
1503 }
1504
1505 // Push remaining block
1506 if in_code && !current_code_block.is_empty() {
1507 blocks.push(Block::Code {
1508 lines: current_code_block,
1509 has_preceding_blank: code_block_has_preceding_blank,
1510 });
1511 } else if in_nested_list && !current_nested_list.is_empty() {
1512 blocks.push(Block::NestedList(current_nested_list));
1513 } else if in_html_block && !current_html_block.is_empty() {
1514 // If we still have an unclosed HTML block, push it anyway
1515 // (malformed HTML - missing closing tag)
1516 blocks.push(Block::Html {
1517 lines: current_html_block,
1518 has_preceding_blank: html_block_has_preceding_blank,
1519 });
1520 } else if !current_paragraph.is_empty() {
1521 blocks.push(Block::Paragraph(current_paragraph));
1522 }
1523
1524 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1525 let content_lines: Vec<String> = list_item_lines
1526 .iter()
1527 .filter_map(|line| {
1528 if let LineType::Content(s) = line {
1529 Some(s.clone())
1530 } else {
1531 None
1532 }
1533 })
1534 .collect();
1535
1536 // Check if we need to reflow this list item
1537 // We check the combined content to see if it exceeds length limits
1538 let combined_content = content_lines.join(" ").trim().to_string();
1539 let full_line = format!("{marker}{combined_content}");
1540
1541 // Helper to check if we should reflow in normalize mode
1542 let should_normalize = || {
1543 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1544 // DO normalize if it has plain text content that spans multiple lines
1545 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1546 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1547 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1548 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1549 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1550 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1551
1552 // If we have structural blocks but no paragraphs, don't normalize
1553 if (has_nested_lists
1554 || has_code_blocks
1555 || has_semantic_lines
1556 || has_snippet_lines
1557 || has_div_markers)
1558 && !has_paragraphs
1559 {
1560 return false;
1561 }
1562
1563 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1564 if has_paragraphs {
1565 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1566 if paragraph_count > 1 {
1567 // Multiple paragraph blocks should be normalized
1568 return true;
1569 }
1570
1571 // Single paragraph block: normalize if it has multiple content lines
1572 if content_lines.len() > 1 {
1573 return true;
1574 }
1575 }
1576
1577 false
1578 };
1579
1580 let needs_reflow = match config.reflow_mode {
1581 ReflowMode::Normalize => {
1582 // Only reflow if:
1583 // 1. The combined line would exceed the limit, OR
1584 // 2. The list item should be normalized (has multi-line plain text)
1585 let combined_length = self.calculate_effective_length(&full_line);
1586 if combined_length > config.line_length.get() {
1587 true
1588 } else {
1589 should_normalize()
1590 }
1591 }
1592 ReflowMode::SentencePerLine => {
1593 // Check if list item has multiple sentences
1594 let sentences = split_into_sentences(&combined_content);
1595 sentences.len() > 1
1596 }
1597 ReflowMode::SemanticLineBreaks => {
1598 let sentences = split_into_sentences(&combined_content);
1599 sentences.len() > 1
1600 || (list_start..i).any(|line_idx| {
1601 self.calculate_effective_length(lines[line_idx]) > config.line_length.get()
1602 })
1603 }
1604 ReflowMode::Default => {
1605 // In default mode, only reflow if any individual line exceeds limit
1606 (list_start..i)
1607 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1608 }
1609 };
1610
1611 if needs_reflow {
1612 let start_range = line_index.whole_line_range(list_start + 1);
1613 let end_line = i - 1;
1614 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1615 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1616 } else {
1617 line_index.whole_line_range(end_line + 1)
1618 };
1619 let byte_range = start_range.start..end_range.end;
1620
1621 // Reflow each block (paragraphs only, preserve code blocks)
1622 // When line_length = 0 (no limit), use a very large value for reflow
1623 let reflow_line_length = if config.line_length.is_unlimited() {
1624 usize::MAX
1625 } else {
1626 config.line_length.get().saturating_sub(indent_size).max(1)
1627 };
1628 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1629 line_length: reflow_line_length,
1630 break_on_sentences: true,
1631 preserve_breaks: false,
1632 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1633 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1634 abbreviations: config.abbreviations_for_reflow(),
1635 length_mode: self.reflow_length_mode(),
1636 };
1637
1638 let mut result: Vec<String> = Vec::new();
1639 let mut is_first_block = true;
1640
1641 for (block_idx, block) in blocks.iter().enumerate() {
1642 match block {
1643 Block::Paragraph(para_lines) => {
1644 // Split the paragraph into segments at hard break boundaries
1645 // Each segment can be reflowed independently
1646 let segments = split_into_segments(para_lines);
1647
1648 for (segment_idx, segment) in segments.iter().enumerate() {
1649 // Check if this segment ends with a hard break and what type
1650 let hard_break_type = segment.last().and_then(|line| {
1651 let line = line.strip_suffix('\r').unwrap_or(line);
1652 if line.ends_with('\\') {
1653 Some("\\")
1654 } else if line.ends_with(" ") {
1655 Some(" ")
1656 } else {
1657 None
1658 }
1659 });
1660
1661 // Join and reflow the segment (removing the hard break marker for processing)
1662 let segment_for_reflow: Vec<String> = segment
1663 .iter()
1664 .map(|line| {
1665 // Strip hard break marker (2 spaces or backslash) for reflow processing
1666 if line.ends_with('\\') {
1667 line[..line.len() - 1].trim_end().to_string()
1668 } else if line.ends_with(" ") {
1669 line[..line.len() - 2].trim_end().to_string()
1670 } else {
1671 line.clone()
1672 }
1673 })
1674 .collect();
1675
1676 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1677 if !segment_text.is_empty() {
1678 let reflowed =
1679 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1680
1681 if is_first_block && segment_idx == 0 {
1682 // First segment of first block starts with marker
1683 result.push(format!("{marker}{}", reflowed[0]));
1684 for line in reflowed.iter().skip(1) {
1685 result.push(format!("{expected_indent}{line}"));
1686 }
1687 is_first_block = false;
1688 } else {
1689 // Subsequent segments
1690 for line in reflowed {
1691 result.push(format!("{expected_indent}{line}"));
1692 }
1693 }
1694
1695 // If this segment had a hard break, add it back to the last line
1696 // Preserve the original hard break format (backslash or two spaces)
1697 if let Some(break_marker) = hard_break_type
1698 && let Some(last_line) = result.last_mut()
1699 {
1700 last_line.push_str(break_marker);
1701 }
1702 }
1703 }
1704
1705 // Add blank line after paragraph block if there's a next block.
1706 // Check if next block is a code block that doesn't want a preceding blank.
1707 // Also don't add blank lines before snippet lines (they should stay tight).
1708 // Only add if not already ending with one (avoids double blanks).
1709 if block_idx < blocks.len() - 1 {
1710 let next_block = &blocks[block_idx + 1];
1711 let should_add_blank = match next_block {
1712 Block::Code {
1713 has_preceding_blank, ..
1714 } => *has_preceding_blank,
1715 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1716 _ => true, // For all other blocks, add blank line
1717 };
1718 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1719 {
1720 result.push(String::new());
1721 }
1722 }
1723 }
1724 Block::Code {
1725 lines: code_lines,
1726 has_preceding_blank: _,
1727 } => {
1728 // Preserve code blocks as-is with original indentation
1729 // NOTE: Blank line before code block is handled by the previous block
1730 // (see paragraph block's logic above)
1731
1732 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1733 if is_first_block && idx == 0 {
1734 // First line of first block gets marker
1735 result.push(format!(
1736 "{marker}{}",
1737 " ".repeat(orig_indent - marker_len) + content
1738 ));
1739 is_first_block = false;
1740 } else if content.is_empty() {
1741 result.push(String::new());
1742 } else {
1743 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1744 }
1745 }
1746 }
1747 Block::NestedList(nested_items) => {
1748 // Preserve nested list items as-is with original indentation.
1749 // Only add blank before if not already ending with one (avoids
1750 // double blanks when the preceding block already added one).
1751 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1752 result.push(String::new());
1753 }
1754
1755 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1756 if is_first_block && idx == 0 {
1757 // First line of first block gets marker
1758 result.push(format!(
1759 "{marker}{}",
1760 " ".repeat(orig_indent - marker_len) + content
1761 ));
1762 is_first_block = false;
1763 } else if content.is_empty() {
1764 result.push(String::new());
1765 } else {
1766 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1767 }
1768 }
1769
1770 // Add blank line after nested list if there's a next block.
1771 // Only add if not already ending with one (avoids double blanks
1772 // when the last nested item was already a blank line).
1773 if block_idx < blocks.len() - 1 {
1774 let next_block = &blocks[block_idx + 1];
1775 let should_add_blank = match next_block {
1776 Block::Code {
1777 has_preceding_blank, ..
1778 } => *has_preceding_blank,
1779 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1780 _ => true, // For all other blocks, add blank line
1781 };
1782 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1783 {
1784 result.push(String::new());
1785 }
1786 }
1787 }
1788 Block::SemanticLine(content) => {
1789 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
1790 // Only add blank before if not already ending with one.
1791 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1792 result.push(String::new());
1793 }
1794
1795 if is_first_block {
1796 // First block starts with marker
1797 result.push(format!("{marker}{content}"));
1798 is_first_block = false;
1799 } else {
1800 // Subsequent blocks use expected indent
1801 result.push(format!("{expected_indent}{content}"));
1802 }
1803
1804 // Add blank line after semantic line if there's a next block.
1805 // Only add if not already ending with one.
1806 if block_idx < blocks.len() - 1 {
1807 let next_block = &blocks[block_idx + 1];
1808 let should_add_blank = match next_block {
1809 Block::Code {
1810 has_preceding_blank, ..
1811 } => *has_preceding_blank,
1812 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1813 _ => true, // For all other blocks, add blank line
1814 };
1815 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1816 {
1817 result.push(String::new());
1818 }
1819 }
1820 }
1821 Block::SnippetLine(content) => {
1822 // Preserve snippet delimiters (-8<-) as-is on their own line
1823 // Unlike semantic lines, snippet lines don't add extra blank lines
1824 if is_first_block {
1825 // First block starts with marker
1826 result.push(format!("{marker}{content}"));
1827 is_first_block = false;
1828 } else {
1829 // Subsequent blocks use expected indent
1830 result.push(format!("{expected_indent}{content}"));
1831 }
1832 // No blank lines added before or after snippet delimiters
1833 }
1834 Block::DivMarker(content) => {
1835 // Preserve div markers (::: opening or closing) as-is on their own line
1836 if is_first_block {
1837 result.push(format!("{marker}{content}"));
1838 is_first_block = false;
1839 } else {
1840 result.push(format!("{expected_indent}{content}"));
1841 }
1842 }
1843 Block::Html {
1844 lines: html_lines,
1845 has_preceding_blank: _,
1846 } => {
1847 // Preserve HTML blocks exactly as-is with original indentation
1848 // NOTE: Blank line before HTML block is handled by the previous block
1849
1850 for (idx, line) in html_lines.iter().enumerate() {
1851 if is_first_block && idx == 0 {
1852 // First line of first block gets marker
1853 result.push(format!("{marker}{line}"));
1854 is_first_block = false;
1855 } else if line.is_empty() {
1856 // Preserve blank lines inside HTML blocks
1857 result.push(String::new());
1858 } else {
1859 // Preserve lines with their original content (already includes indentation)
1860 result.push(format!("{expected_indent}{line}"));
1861 }
1862 }
1863
1864 // Add blank line after HTML block if there's a next block.
1865 // Only add if not already ending with one (avoids double blanks
1866 // when the HTML block itself contained a trailing blank line).
1867 if block_idx < blocks.len() - 1 {
1868 let next_block = &blocks[block_idx + 1];
1869 let should_add_blank = match next_block {
1870 Block::Code {
1871 has_preceding_blank, ..
1872 } => *has_preceding_blank,
1873 Block::Html {
1874 has_preceding_blank, ..
1875 } => *has_preceding_blank,
1876 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1877 _ => true, // For all other blocks, add blank line
1878 };
1879 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1880 {
1881 result.push(String::new());
1882 }
1883 }
1884 }
1885 }
1886 }
1887
1888 let reflowed_text = result.join(line_ending);
1889
1890 // Preserve trailing newline
1891 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1892 format!("{reflowed_text}{line_ending}")
1893 } else {
1894 reflowed_text
1895 };
1896
1897 // Get the original text to compare
1898 let original_text = &ctx.content[byte_range.clone()];
1899
1900 // Only generate a warning if the replacement is different from the original
1901 if original_text != replacement {
1902 // Generate an appropriate message based on why reflow is needed
1903 let message = match config.reflow_mode {
1904 ReflowMode::SentencePerLine => {
1905 let num_sentences = split_into_sentences(&combined_content).len();
1906 let num_lines = content_lines.len();
1907 if num_lines == 1 {
1908 // Single line with multiple sentences
1909 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1910 } else {
1911 // Multiple lines - could be split sentences or mixed
1912 format!(
1913 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1914 )
1915 }
1916 }
1917 ReflowMode::SemanticLineBreaks => {
1918 let num_sentences = split_into_sentences(&combined_content).len();
1919 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
1920 }
1921 ReflowMode::Normalize => {
1922 let combined_length = self.calculate_effective_length(&full_line);
1923 if combined_length > config.line_length.get() {
1924 format!(
1925 "Line length {} exceeds {} characters",
1926 combined_length,
1927 config.line_length.get()
1928 )
1929 } else {
1930 "Multi-line content can be normalized".to_string()
1931 }
1932 }
1933 ReflowMode::Default => {
1934 let combined_length = self.calculate_effective_length(&full_line);
1935 format!(
1936 "Line length {} exceeds {} characters",
1937 combined_length,
1938 config.line_length.get()
1939 )
1940 }
1941 };
1942
1943 warnings.push(LintWarning {
1944 rule_name: Some(self.name().to_string()),
1945 message,
1946 line: list_start + 1,
1947 column: 1,
1948 end_line: end_line + 1,
1949 end_column: lines[end_line].len() + 1,
1950 severity: Severity::Warning,
1951 fix: Some(crate::rule::Fix {
1952 range: byte_range,
1953 replacement,
1954 }),
1955 });
1956 }
1957 }
1958 continue;
1959 }
1960
1961 // Found start of a paragraph - collect all lines in it
1962 let paragraph_start = i;
1963 let mut paragraph_lines = vec![lines[i]];
1964 i += 1;
1965
1966 while i < lines.len() {
1967 let next_line = lines[i];
1968 let next_line_num = i + 1;
1969 let next_trimmed = next_line.trim();
1970
1971 // Stop at paragraph boundaries
1972 if next_trimmed.is_empty()
1973 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1974 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1975 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1976 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1977 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1978 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1979 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1980 || ctx
1981 .line_info(next_line_num)
1982 .is_some_and(|info| info.in_mkdocs_container())
1983 || (next_line_num > 0
1984 && next_line_num <= ctx.lines.len()
1985 && ctx.lines[next_line_num - 1].blockquote.is_some())
1986 || next_trimmed.starts_with('#')
1987 || TableUtils::is_potential_table_row(next_line)
1988 || is_list_item(next_trimmed)
1989 || is_horizontal_rule(next_trimmed)
1990 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1991 || is_template_directive_only(next_line)
1992 || is_standalone_attr_list(next_line)
1993 || is_snippet_block_delimiter(next_line)
1994 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
1995 {
1996 break;
1997 }
1998
1999 // Check if the previous line ends with a hard break (2+ spaces or backslash)
2000 if i > 0 && has_hard_break(lines[i - 1]) {
2001 // Don't include lines after hard breaks in the same paragraph
2002 break;
2003 }
2004
2005 paragraph_lines.push(next_line);
2006 i += 1;
2007 }
2008
2009 // Combine paragraph lines into a single string for processing
2010 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
2011 let paragraph_text = paragraph_lines.join(" ");
2012
2013 // Skip reflowing if this paragraph contains definition list items
2014 // Definition lists are multi-line structures that should not be joined
2015 let contains_definition_list = paragraph_lines
2016 .iter()
2017 .any(|line| crate::utils::is_definition_list_item(line));
2018
2019 if contains_definition_list {
2020 // Don't reflow definition lists - skip this paragraph
2021 i = paragraph_start + paragraph_lines.len();
2022 continue;
2023 }
2024
2025 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2026 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2027 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2028
2029 if contains_snippets {
2030 // Don't reflow Snippets blocks - skip this paragraph
2031 i = paragraph_start + paragraph_lines.len();
2032 continue;
2033 }
2034
2035 // Check if this paragraph needs reflowing
2036 let needs_reflow = match config.reflow_mode {
2037 ReflowMode::Normalize => {
2038 // In normalize mode, reflow multi-line paragraphs
2039 paragraph_lines.len() > 1
2040 }
2041 ReflowMode::SentencePerLine => {
2042 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2043 // Note: we check the joined text because sentences can span multiple lines
2044 let sentences = split_into_sentences(¶graph_text);
2045
2046 // Always reflow if multiple sentences on one line
2047 if sentences.len() > 1 {
2048 true
2049 } else if paragraph_lines.len() > 1 {
2050 // For single-sentence paragraphs spanning multiple lines:
2051 // Reflow if they COULD fit on one line (respecting line-length constraint)
2052 if config.line_length.is_unlimited() {
2053 // No line-length constraint - always join single sentences
2054 true
2055 } else {
2056 // Only join if it fits within line-length
2057 let effective_length = self.calculate_effective_length(¶graph_text);
2058 effective_length <= config.line_length.get()
2059 }
2060 } else {
2061 false
2062 }
2063 }
2064 ReflowMode::SemanticLineBreaks => {
2065 let sentences = split_into_sentences(¶graph_text);
2066 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2067 sentences.len() > 1
2068 || paragraph_lines.len() > 1
2069 || paragraph_lines
2070 .iter()
2071 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2072 }
2073 ReflowMode::Default => {
2074 // In default mode, only reflow if lines exceed limit
2075 paragraph_lines
2076 .iter()
2077 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2078 }
2079 };
2080
2081 if needs_reflow {
2082 // Calculate byte range for this paragraph
2083 // Use whole_line_range for each line and combine
2084 let start_range = line_index.whole_line_range(paragraph_start + 1);
2085 let end_line = paragraph_start + paragraph_lines.len() - 1;
2086
2087 // For the last line, we want to preserve any trailing newline
2088 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2089 // Last line without trailing newline - use line_text_range
2090 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2091 } else {
2092 // Not the last line or has trailing newline - use whole_line_range
2093 line_index.whole_line_range(end_line + 1)
2094 };
2095
2096 let byte_range = start_range.start..end_range.end;
2097
2098 // Check if the paragraph ends with a hard break and what type
2099 let hard_break_type = paragraph_lines.last().and_then(|line| {
2100 let line = line.strip_suffix('\r').unwrap_or(line);
2101 if line.ends_with('\\') {
2102 Some("\\")
2103 } else if line.ends_with(" ") {
2104 Some(" ")
2105 } else {
2106 None
2107 }
2108 });
2109
2110 // Reflow the paragraph
2111 // When line_length = 0 (no limit), use a very large value for reflow
2112 let reflow_line_length = if config.line_length.is_unlimited() {
2113 usize::MAX
2114 } else {
2115 config.line_length.get()
2116 };
2117 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2118 line_length: reflow_line_length,
2119 break_on_sentences: true,
2120 preserve_breaks: false,
2121 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2122 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2123 abbreviations: config.abbreviations_for_reflow(),
2124 length_mode: self.reflow_length_mode(),
2125 };
2126 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
2127
2128 // If the original paragraph ended with a hard break, preserve it
2129 // Preserve the original hard break format (backslash or two spaces)
2130 if let Some(break_marker) = hard_break_type
2131 && !reflowed.is_empty()
2132 {
2133 let last_idx = reflowed.len() - 1;
2134 if !has_hard_break(&reflowed[last_idx]) {
2135 reflowed[last_idx].push_str(break_marker);
2136 }
2137 }
2138
2139 let reflowed_text = reflowed.join(line_ending);
2140
2141 // Preserve trailing newline if the original paragraph had one
2142 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2143 format!("{reflowed_text}{line_ending}")
2144 } else {
2145 reflowed_text
2146 };
2147
2148 // Get the original text to compare
2149 let original_text = &ctx.content[byte_range.clone()];
2150
2151 // Only generate a warning if the replacement is different from the original
2152 if original_text != replacement {
2153 // Create warning with actual fix
2154 // In default mode, report the specific line that violates
2155 // In normalize mode, report the whole paragraph
2156 // In sentence-per-line mode, report the entire paragraph
2157 let (warning_line, warning_end_line) = match config.reflow_mode {
2158 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2159 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2160 // Highlight the entire paragraph that needs reformatting
2161 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2162 }
2163 ReflowMode::Default => {
2164 // Find the first line that exceeds the limit
2165 let mut violating_line = paragraph_start;
2166 for (idx, line) in paragraph_lines.iter().enumerate() {
2167 if self.calculate_effective_length(line) > config.line_length.get() {
2168 violating_line = paragraph_start + idx;
2169 break;
2170 }
2171 }
2172 (violating_line + 1, violating_line + 1)
2173 }
2174 };
2175
2176 warnings.push(LintWarning {
2177 rule_name: Some(self.name().to_string()),
2178 message: match config.reflow_mode {
2179 ReflowMode::Normalize => format!(
2180 "Paragraph could be normalized to use line length of {} characters",
2181 config.line_length.get()
2182 ),
2183 ReflowMode::SentencePerLine => {
2184 let num_sentences = split_into_sentences(¶graph_text).len();
2185 if paragraph_lines.len() == 1 {
2186 // Single line with multiple sentences
2187 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2188 } else {
2189 let num_lines = paragraph_lines.len();
2190 // Multiple lines - could be split sentences or mixed
2191 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2192 }
2193 },
2194 ReflowMode::SemanticLineBreaks => {
2195 let num_sentences = split_into_sentences(¶graph_text).len();
2196 format!(
2197 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2198 )
2199 },
2200 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2201 },
2202 line: warning_line,
2203 column: 1,
2204 end_line: warning_end_line,
2205 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2206 severity: Severity::Warning,
2207 fix: Some(crate::rule::Fix {
2208 range: byte_range,
2209 replacement,
2210 }),
2211 });
2212 }
2213 }
2214 }
2215
2216 warnings
2217 }
2218
2219 /// Calculate string length based on the configured length mode
2220 fn calculate_string_length(&self, s: &str) -> usize {
2221 match self.config.length_mode {
2222 LengthMode::Chars => s.chars().count(),
2223 LengthMode::Visual => s.width(),
2224 LengthMode::Bytes => s.len(),
2225 }
2226 }
2227
2228 /// Calculate effective line length
2229 ///
2230 /// Returns the actual display length of the line using the configured length mode.
2231 fn calculate_effective_length(&self, line: &str) -> usize {
2232 self.calculate_string_length(line)
2233 }
2234
2235 /// Calculate line length with inline link/image URLs removed.
2236 ///
2237 /// For each inline link `[text](url)` or image `` on the line,
2238 /// computes the "savings" from removing the URL portion (keeping only `[text]`
2239 /// or `![alt]`). Returns `effective_length - total_savings`.
2240 ///
2241 /// Handles nested constructs (e.g., `[](url)`) by only counting the
2242 /// outermost construct to avoid double-counting.
2243 fn calculate_text_only_length(
2244 &self,
2245 effective_length: usize,
2246 line_number: usize,
2247 ctx: &crate::lint_context::LintContext,
2248 ) -> usize {
2249 let line_range = ctx.line_index.line_content_range(line_number);
2250 let line_byte_end = line_range.end;
2251
2252 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2253 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2254
2255 for link in &ctx.links {
2256 if link.line != line_number || link.is_reference {
2257 continue;
2258 }
2259 if !matches!(link.link_type, LinkType::Inline) {
2260 continue;
2261 }
2262 // Skip cross-line links
2263 if link.byte_end > line_byte_end {
2264 continue;
2265 }
2266 // `[text]` in configured length mode
2267 let text_only_len = 2 + self.calculate_string_length(&link.text);
2268 constructs.push((link.byte_offset, link.byte_end, text_only_len));
2269 }
2270
2271 for image in &ctx.images {
2272 if image.line != line_number || image.is_reference {
2273 continue;
2274 }
2275 if !matches!(image.link_type, LinkType::Inline) {
2276 continue;
2277 }
2278 // Skip cross-line images
2279 if image.byte_end > line_byte_end {
2280 continue;
2281 }
2282 // `![alt]` in configured length mode
2283 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2284 constructs.push((image.byte_offset, image.byte_end, text_only_len));
2285 }
2286
2287 if constructs.is_empty() {
2288 return effective_length;
2289 }
2290
2291 // Sort by byte offset to handle overlapping/nested constructs
2292 constructs.sort_by_key(|&(start, _, _)| start);
2293
2294 let mut total_savings: usize = 0;
2295 let mut last_end: usize = 0;
2296
2297 for (start, end, text_only_len) in &constructs {
2298 // Skip constructs nested inside a previously counted one
2299 if *start < last_end {
2300 continue;
2301 }
2302 // Full construct length in configured length mode
2303 let full_source = &ctx.content[*start..*end];
2304 let full_len = self.calculate_string_length(full_source);
2305 total_savings += full_len.saturating_sub(*text_only_len);
2306 last_end = *end;
2307 }
2308
2309 effective_length.saturating_sub(total_savings)
2310 }
2311}