rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_list_item,
26 is_standalone_link_or_image_line, split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37 pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42 line_idx: usize,
43 data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48 Self {
49 config: MD013Config {
50 line_length: crate::types::LineLength::new(line_length),
51 code_blocks,
52 tables,
53 headings,
54 paragraphs: true, // Default to true for backwards compatibility
55 strict,
56 reflow: false,
57 reflow_mode: ReflowMode::default(),
58 length_mode: LengthMode::default(),
59 abbreviations: Vec::new(),
60 },
61 }
62 }
63
64 pub fn from_config_struct(config: MD013Config) -> Self {
65 Self { config }
66 }
67
68 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
69 fn reflow_length_mode(&self) -> ReflowLengthMode {
70 match self.config.length_mode {
71 LengthMode::Chars => ReflowLengthMode::Chars,
72 LengthMode::Visual => ReflowLengthMode::Visual,
73 LengthMode::Bytes => ReflowLengthMode::Bytes,
74 }
75 }
76
77 fn should_ignore_line(
78 &self,
79 line: &str,
80 _lines: &[&str],
81 current_line: usize,
82 ctx: &crate::lint_context::LintContext,
83 ) -> bool {
84 if self.config.strict {
85 return false;
86 }
87
88 // Quick check for common patterns before expensive regex
89 let trimmed = line.trim();
90
91 // Only skip if the entire line is a URL (quick check first)
92 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
93 return true;
94 }
95
96 // Only skip if the entire line is an image reference (quick check first)
97 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
98 return true;
99 }
100
101 // Note: link reference definitions are handled as always-exempt (even in strict mode)
102 // in the main check loop, so they don't need to be checked here.
103
104 // Code blocks with long strings (only check if in code block)
105 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
106 && !trimmed.is_empty()
107 && !line.contains(' ')
108 && !line.contains('\t')
109 {
110 return true;
111 }
112
113 false
114 }
115
116 /// Check if rule should skip based on provided config (used for inline config support)
117 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
118 // Skip if content is empty
119 if ctx.content.is_empty() {
120 return true;
121 }
122
123 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
124 if config.reflow
125 && (config.reflow_mode == ReflowMode::SentencePerLine
126 || config.reflow_mode == ReflowMode::SemanticLineBreaks
127 || config.reflow_mode == ReflowMode::Normalize)
128 {
129 return false;
130 }
131
132 // Quick check: if total content is shorter than line limit, definitely skip
133 if ctx.content.len() <= config.line_length.get() {
134 return true;
135 }
136
137 // Skip if no line exceeds the limit
138 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
139 }
140}
141
142impl Rule for MD013LineLength {
143 fn name(&self) -> &'static str {
144 "MD013"
145 }
146
147 fn description(&self) -> &'static str {
148 "Line length should not be excessive"
149 }
150
151 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
152 // Use pre-parsed inline config from LintContext
153 let config_override = ctx.inline_config().get_rule_config("MD013");
154
155 // Apply configuration override if present
156 let effective_config = if let Some(json_config) = config_override {
157 if let Some(obj) = json_config.as_object() {
158 let mut config = self.config.clone();
159 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
160 config.line_length = crate::types::LineLength::new(line_length as usize);
161 }
162 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
163 config.code_blocks = code_blocks;
164 }
165 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
166 config.tables = tables;
167 }
168 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
169 config.headings = headings;
170 }
171 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
172 config.strict = strict;
173 }
174 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
175 config.reflow = reflow;
176 }
177 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
178 config.reflow_mode = match reflow_mode {
179 "default" => ReflowMode::Default,
180 "normalize" => ReflowMode::Normalize,
181 "sentence-per-line" => ReflowMode::SentencePerLine,
182 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
183 _ => ReflowMode::default(),
184 };
185 }
186 config
187 } else {
188 self.config.clone()
189 }
190 } else {
191 self.config.clone()
192 };
193
194 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
195 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
196 if self.should_skip_with_config(ctx, &effective_config)
197 && !(effective_config.reflow
198 && (effective_config.reflow_mode == ReflowMode::Normalize
199 || effective_config.reflow_mode == ReflowMode::SentencePerLine
200 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
201 {
202 return Ok(Vec::new());
203 }
204
205 // Direct implementation without DocumentStructure
206 let mut warnings = Vec::new();
207
208 // Special handling: line_length = 0 means "no line length limit"
209 // Skip all line length checks, but still allow reflow if enabled
210 let skip_length_checks = effective_config.line_length.is_unlimited();
211
212 // Pre-filter lines that could be problematic to avoid processing all lines
213 let mut candidate_lines = Vec::new();
214 if !skip_length_checks {
215 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
216 // Skip front matter - it should never be linted
217 if line_info.in_front_matter {
218 continue;
219 }
220
221 // Quick length check first
222 if line_info.byte_len > effective_config.line_length.get() {
223 candidate_lines.push(line_idx);
224 }
225 }
226 }
227
228 // If no candidate lines and not in normalize or sentence-per-line mode, early return
229 if candidate_lines.is_empty()
230 && !(effective_config.reflow
231 && (effective_config.reflow_mode == ReflowMode::Normalize
232 || effective_config.reflow_mode == ReflowMode::SentencePerLine
233 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
234 {
235 return Ok(warnings);
236 }
237
238 let lines = ctx.raw_lines();
239
240 // Create a quick lookup set for heading lines
241 // We need this for both the heading skip check AND the paragraphs check
242 let heading_lines_set: std::collections::HashSet<usize> = ctx
243 .lines
244 .iter()
245 .enumerate()
246 .filter(|(_, line)| line.heading.is_some())
247 .map(|(idx, _)| idx + 1)
248 .collect();
249
250 // Use pre-computed table blocks from context
251 // We need this for both the table skip check AND the paragraphs check
252 let table_blocks = &ctx.table_blocks;
253 let mut table_lines_set = std::collections::HashSet::new();
254 for table in table_blocks {
255 table_lines_set.insert(table.header_line + 1);
256 table_lines_set.insert(table.delimiter_line + 1);
257 for &line in &table.content_lines {
258 table_lines_set.insert(line + 1);
259 }
260 }
261
262 // Process candidate lines for line length checks
263 for &line_idx in &candidate_lines {
264 let line_number = line_idx + 1;
265 let line = lines[line_idx];
266
267 // Calculate actual line length (used in warning messages)
268 let effective_length = self.calculate_effective_length(line);
269
270 // Use single line length limit for all content
271 let line_limit = effective_config.line_length.get();
272
273 // In non-strict mode, forgive the trailing non-whitespace run.
274 // If the line only exceeds the limit because of a long token at the end
275 // (URL, link chain, identifier), it passes. This matches markdownlint's
276 // behavior: line.replace(/\S*$/u, "#")
277 let check_length = if effective_config.strict {
278 effective_length
279 } else {
280 match line.rfind(char::is_whitespace) {
281 Some(pos) => {
282 let ws_char = line[pos..].chars().next().unwrap();
283 let prefix_end = pos + ws_char.len_utf8();
284 self.calculate_string_length(&line[..prefix_end]) + 1
285 }
286 None => 1, // No whitespace — entire line is a single token
287 }
288 };
289
290 // Skip lines where the check length is within the limit
291 if check_length <= line_limit {
292 continue;
293 }
294
295 // Semantic link understanding: suppress when excess comes entirely from inline URLs
296 if !effective_config.strict {
297 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
298 if text_only_length <= line_limit {
299 continue;
300 }
301 }
302
303 // Skip mkdocstrings blocks (already handled by LintContext)
304 if ctx.lines[line_idx].in_mkdocstrings {
305 continue;
306 }
307
308 // Link reference definitions are always exempt, even in strict mode.
309 // There's no way to shorten them without breaking the URL.
310 // Also check after stripping list markers, since list items may
311 // contain link ref defs as their content.
312 {
313 let trimmed = line.trim();
314 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
315 continue;
316 }
317 if is_list_item(trimmed) {
318 let (_, content) = extract_list_marker_and_content(trimmed);
319 let content_trimmed = content.trim();
320 if content_trimmed.starts_with('[')
321 && content_trimmed.contains("]:")
322 && LINK_REF_PATTERN.is_match(content_trimmed)
323 {
324 continue;
325 }
326 }
327 }
328
329 // Skip various block types efficiently
330 if !effective_config.strict {
331 // Lines whose only content is a link/image are exempt.
332 // After stripping list markers, blockquote markers, and emphasis,
333 // if only a link or image remains, there is no way to shorten it.
334 if is_standalone_link_or_image_line(line) {
335 continue;
336 }
337
338 // Skip setext heading underlines
339 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
340 continue;
341 }
342
343 // Skip block elements according to config flags
344 // The flags mean: true = check these elements, false = skip these elements
345 // So we skip when the flag is FALSE and the line is in that element type
346 if (!effective_config.headings && heading_lines_set.contains(&line_number))
347 || (!effective_config.code_blocks
348 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
349 || (!effective_config.tables && table_lines_set.contains(&line_number))
350 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
351 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
352 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
353 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
354 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
355 {
356 continue;
357 }
358
359 // Check if this is a paragraph/regular text line
360 // If paragraphs = false, skip lines that are NOT in special blocks
361 if !effective_config.paragraphs {
362 let is_special_block = heading_lines_set.contains(&line_number)
363 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
364 || table_lines_set.contains(&line_number)
365 || ctx.lines[line_number - 1].blockquote.is_some()
366 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
367 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
368 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
369 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
370 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
371 || ctx
372 .line_info(line_number)
373 .is_some_and(|info| info.in_mkdocs_container());
374
375 // Skip regular paragraph text when paragraphs = false
376 if !is_special_block {
377 continue;
378 }
379 }
380
381 // Skip lines that are only a URL, image ref, or link ref
382 if self.should_ignore_line(line, lines, line_idx, ctx) {
383 continue;
384 }
385 }
386
387 // In sentence-per-line mode, check if this is a single long sentence
388 // If so, emit a warning without a fix (user must manually rephrase)
389 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
390 let sentences = split_into_sentences(line.trim());
391 if sentences.len() == 1 {
392 // Single sentence that's too long - warn but don't auto-fix
393 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
394
395 let (start_line, start_col, end_line, end_col) =
396 calculate_excess_range(line_number, line, line_limit);
397
398 warnings.push(LintWarning {
399 rule_name: Some(self.name().to_string()),
400 message,
401 line: start_line,
402 column: start_col,
403 end_line,
404 end_column: end_col,
405 severity: Severity::Warning,
406 fix: None, // No auto-fix for long single sentences
407 });
408 continue;
409 }
410 // Multiple sentences will be handled by paragraph-based reflow
411 continue;
412 }
413
414 // In semantic-line-breaks mode, skip per-line checks —
415 // all reflow is handled at the paragraph level with cascading splits
416 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
417 continue;
418 }
419
420 // Don't provide fix for individual lines when reflow is enabled
421 // Paragraph-based fixes will be handled separately
422 let fix = None;
423
424 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
425
426 // Calculate precise character range for the excess portion
427 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
428
429 warnings.push(LintWarning {
430 rule_name: Some(self.name().to_string()),
431 message,
432 line: start_line,
433 column: start_col,
434 end_line,
435 end_column: end_col,
436 severity: Severity::Warning,
437 fix,
438 });
439 }
440
441 // If reflow is enabled, generate paragraph-based fixes
442 if effective_config.reflow {
443 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
444 // Merge paragraph warnings with line warnings, removing duplicates
445 for pw in paragraph_warnings {
446 // Remove any line warnings that overlap with this paragraph
447 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
448 warnings.push(pw);
449 }
450 }
451
452 Ok(warnings)
453 }
454
455 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
456 // For CLI usage, apply fixes from warnings
457 // LSP will use the warning-based fixes directly
458 let warnings = self.check(ctx)?;
459
460 // If there are no fixes, return content unchanged
461 if !warnings.iter().any(|w| w.fix.is_some()) {
462 return Ok(ctx.content.to_string());
463 }
464
465 // Apply warning-based fixes
466 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
467 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
468 }
469
470 fn as_any(&self) -> &dyn std::any::Any {
471 self
472 }
473
474 fn category(&self) -> RuleCategory {
475 RuleCategory::Whitespace
476 }
477
478 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
479 self.should_skip_with_config(ctx, &self.config)
480 }
481
482 fn default_config_section(&self) -> Option<(String, toml::Value)> {
483 let default_config = MD013Config::default();
484 let json_value = serde_json::to_value(&default_config).ok()?;
485 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
486
487 if let toml::Value::Table(table) = toml_value {
488 if !table.is_empty() {
489 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
490 } else {
491 None
492 }
493 } else {
494 None
495 }
496 }
497
498 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
499 let mut aliases = std::collections::HashMap::new();
500 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
501 Some(aliases)
502 }
503
504 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
505 where
506 Self: Sized,
507 {
508 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
509 // Use global line_length if rule-specific config still has default value
510 if rule_config.line_length.get() == 80 {
511 rule_config.line_length = config.global.line_length;
512 }
513 Box::new(Self::from_config_struct(rule_config))
514 }
515}
516
517impl MD013LineLength {
518 fn is_blockquote_content_boundary(
519 &self,
520 content: &str,
521 line_num: usize,
522 ctx: &crate::lint_context::LintContext,
523 ) -> bool {
524 let trimmed = content.trim();
525
526 trimmed.is_empty()
527 || ctx.line_info(line_num).is_some_and(|info| {
528 info.in_code_block
529 || info.in_front_matter
530 || info.in_html_block
531 || info.in_html_comment
532 || info.in_esm_block
533 || info.in_jsx_expression
534 || info.in_mdx_comment
535 || info.in_mkdocstrings
536 || info.in_mkdocs_container()
537 || info.is_div_marker
538 })
539 || trimmed.starts_with('#')
540 || trimmed.starts_with("```")
541 || trimmed.starts_with("~~~")
542 || trimmed.starts_with('>')
543 || TableUtils::is_potential_table_row(content)
544 || is_list_item(trimmed)
545 || is_horizontal_rule(trimmed)
546 || (trimmed.starts_with('[') && content.contains("]:"))
547 || is_template_directive_only(content)
548 || is_standalone_attr_list(content)
549 || is_snippet_block_delimiter(content)
550 || is_github_alert_marker(trimmed)
551 }
552
553 fn generate_blockquote_paragraph_fix(
554 &self,
555 ctx: &crate::lint_context::LintContext,
556 config: &MD013Config,
557 lines: &[&str],
558 line_index: &LineIndex,
559 start_idx: usize,
560 line_ending: &str,
561 ) -> (Option<LintWarning>, usize) {
562 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
563 return (None, start_idx + 1);
564 };
565 let target_level = start_bq.nesting_level;
566
567 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
568 let mut i = start_idx;
569
570 while i < lines.len() {
571 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
572 break;
573 }
574
575 let line_num = i + 1;
576 if line_num > ctx.lines.len() {
577 break;
578 }
579
580 if lines[i].trim().is_empty() {
581 break;
582 }
583
584 let line_bq = ctx.lines[i].blockquote.as_deref();
585 if let Some(bq) = line_bq {
586 if bq.nesting_level != target_level {
587 break;
588 }
589
590 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
591 break;
592 }
593
594 collected.push(CollectedBlockquoteLine {
595 line_idx: i,
596 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
597 });
598 i += 1;
599 continue;
600 }
601
602 let lazy_content = lines[i].trim_start();
603 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
604 break;
605 }
606
607 collected.push(CollectedBlockquoteLine {
608 line_idx: i,
609 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
610 });
611 i += 1;
612 }
613
614 if collected.is_empty() {
615 return (None, start_idx + 1);
616 }
617
618 let next_idx = i;
619 let paragraph_start = collected[0].line_idx;
620 let end_line = collected[collected.len() - 1].line_idx;
621 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
622 let paragraph_text = line_data
623 .iter()
624 .map(|d| d.content.as_str())
625 .collect::<Vec<_>>()
626 .join(" ");
627
628 let contains_definition_list = line_data
629 .iter()
630 .any(|d| crate::utils::is_definition_list_item(&d.content));
631 if contains_definition_list {
632 return (None, next_idx);
633 }
634
635 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
636 if contains_snippets {
637 return (None, next_idx);
638 }
639
640 let needs_reflow = match config.reflow_mode {
641 ReflowMode::Normalize => line_data.len() > 1,
642 ReflowMode::SentencePerLine => {
643 let sentences = split_into_sentences(¶graph_text);
644 sentences.len() > 1 || line_data.len() > 1
645 }
646 ReflowMode::SemanticLineBreaks => {
647 let sentences = split_into_sentences(¶graph_text);
648 sentences.len() > 1
649 || line_data.len() > 1
650 || collected
651 .iter()
652 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
653 }
654 ReflowMode::Default => collected
655 .iter()
656 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
657 };
658
659 if !needs_reflow {
660 return (None, next_idx);
661 }
662
663 let fallback_prefix = start_bq.prefix.clone();
664 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
665 let continuation_style = blockquote_continuation_style(&line_data);
666
667 let reflow_line_length = if config.line_length.is_unlimited() {
668 usize::MAX
669 } else {
670 config
671 .line_length
672 .get()
673 .saturating_sub(self.calculate_string_length(&explicit_prefix))
674 .max(1)
675 };
676
677 let reflow_options = crate::utils::text_reflow::ReflowOptions {
678 line_length: reflow_line_length,
679 break_on_sentences: true,
680 preserve_breaks: false,
681 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
682 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
683 abbreviations: config.abbreviations_for_reflow(),
684 length_mode: self.reflow_length_mode(),
685 };
686
687 let reflowed_with_style =
688 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
689
690 if reflowed_with_style.is_empty() {
691 return (None, next_idx);
692 }
693
694 let reflowed_text = reflowed_with_style.join(line_ending);
695
696 let start_range = line_index.whole_line_range(paragraph_start + 1);
697 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
698 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
699 } else {
700 line_index.whole_line_range(end_line + 1)
701 };
702 let byte_range = start_range.start..end_range.end;
703
704 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
705 format!("{reflowed_text}{line_ending}")
706 } else {
707 reflowed_text
708 };
709
710 let original_text = &ctx.content[byte_range.clone()];
711 if original_text == replacement {
712 return (None, next_idx);
713 }
714
715 let (warning_line, warning_end_line) = match config.reflow_mode {
716 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
717 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
718 ReflowMode::Default => {
719 let violating_line = collected
720 .iter()
721 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
722 .map(|line| line.line_idx + 1)
723 .unwrap_or(paragraph_start + 1);
724 (violating_line, violating_line)
725 }
726 };
727
728 let warning = LintWarning {
729 rule_name: Some(self.name().to_string()),
730 message: match config.reflow_mode {
731 ReflowMode::Normalize => format!(
732 "Paragraph could be normalized to use line length of {} characters",
733 config.line_length.get()
734 ),
735 ReflowMode::SentencePerLine => {
736 let num_sentences = split_into_sentences(¶graph_text).len();
737 if line_data.len() == 1 {
738 format!("Line contains {num_sentences} sentences (one sentence per line required)")
739 } else {
740 let num_lines = line_data.len();
741 format!(
742 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
743 )
744 }
745 }
746 ReflowMode::SemanticLineBreaks => {
747 let num_sentences = split_into_sentences(¶graph_text).len();
748 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
749 }
750 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
751 },
752 line: warning_line,
753 column: 1,
754 end_line: warning_end_line,
755 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
756 severity: Severity::Warning,
757 fix: Some(crate::rule::Fix {
758 range: byte_range,
759 replacement,
760 }),
761 };
762
763 (Some(warning), next_idx)
764 }
765
766 /// Generate paragraph-based fixes
767 fn generate_paragraph_fixes(
768 &self,
769 ctx: &crate::lint_context::LintContext,
770 config: &MD013Config,
771 lines: &[&str],
772 ) -> Vec<LintWarning> {
773 let mut warnings = Vec::new();
774 let line_index = LineIndex::new(ctx.content);
775
776 // Detect the content's line ending style to preserve it in replacements.
777 // The LSP receives content from editors which may use CRLF (Windows).
778 // Replacements must match the original line endings to avoid false positives.
779 let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
780
781 let mut i = 0;
782 while i < lines.len() {
783 let line_num = i + 1;
784
785 // Handle blockquote paragraphs with style-preserving reflow.
786 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
787 let (warning, next_idx) =
788 self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
789 if let Some(warning) = warning {
790 warnings.push(warning);
791 }
792 i = next_idx;
793 continue;
794 }
795
796 // Skip special structures (but NOT MkDocs containers - those get special handling)
797 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
798 info.in_code_block
799 || info.in_front_matter
800 || info.in_html_block
801 || info.in_html_comment
802 || info.in_esm_block
803 || info.in_jsx_expression
804 || info.in_mdx_comment
805 || info.in_mkdocstrings
806 });
807
808 if should_skip_due_to_line_info
809 || lines[i].trim().starts_with('#')
810 || TableUtils::is_potential_table_row(lines[i])
811 || lines[i].trim().is_empty()
812 || is_horizontal_rule(lines[i].trim())
813 || is_template_directive_only(lines[i])
814 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
815 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
816 {
817 i += 1;
818 continue;
819 }
820
821 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
822 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
823 // Skip admonition/tab marker lines — only reflow their indented content
824 let current_line = lines[i];
825 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
826 i += 1;
827 continue;
828 }
829
830 let container_start = i;
831
832 // Detect the actual indent level from the first content line
833 // (supports nested admonitions with 8+ spaces)
834 let first_line = lines[i];
835 let base_indent_len = first_line.len() - first_line.trim_start().len();
836 let base_indent: String = " ".repeat(base_indent_len);
837
838 // Collect consecutive MkDocs container paragraph lines
839 let mut container_lines: Vec<&str> = Vec::new();
840 while i < lines.len() {
841 let current_line_num = i + 1;
842 let line_info = ctx.line_info(current_line_num);
843
844 // Stop if we leave the MkDocs container
845 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
846 break;
847 }
848
849 let line = lines[i];
850
851 // Stop at paragraph boundaries within the container
852 if line.trim().is_empty() {
853 break;
854 }
855
856 // Skip list items, code blocks, headings within containers
857 if is_list_item(line.trim())
858 || line.trim().starts_with("```")
859 || line.trim().starts_with("~~~")
860 || line.trim().starts_with('#')
861 {
862 break;
863 }
864
865 container_lines.push(line);
866 i += 1;
867 }
868
869 if container_lines.is_empty() {
870 // Must advance i to avoid infinite loop when we encounter
871 // non-paragraph content (code block, list, heading, empty line)
872 // at the start of an MkDocs container
873 i += 1;
874 continue;
875 }
876
877 // Strip the base indent from each line and join for reflow
878 let stripped_lines: Vec<&str> = container_lines
879 .iter()
880 .map(|line| {
881 if line.starts_with(&base_indent) {
882 &line[base_indent_len..]
883 } else {
884 line.trim_start()
885 }
886 })
887 .collect();
888 let paragraph_text = stripped_lines.join(" ");
889
890 // Check if reflow is needed
891 let needs_reflow = match config.reflow_mode {
892 ReflowMode::Normalize => container_lines.len() > 1,
893 ReflowMode::SentencePerLine => {
894 let sentences = split_into_sentences(¶graph_text);
895 sentences.len() > 1 || container_lines.len() > 1
896 }
897 ReflowMode::SemanticLineBreaks => {
898 let sentences = split_into_sentences(¶graph_text);
899 sentences.len() > 1
900 || container_lines.len() > 1
901 || container_lines
902 .iter()
903 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
904 }
905 ReflowMode::Default => container_lines
906 .iter()
907 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
908 };
909
910 if !needs_reflow {
911 continue;
912 }
913
914 // Calculate byte range for this container paragraph
915 let start_range = line_index.whole_line_range(container_start + 1);
916 let end_line = container_start + container_lines.len() - 1;
917 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
918 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
919 } else {
920 line_index.whole_line_range(end_line + 1)
921 };
922 let byte_range = start_range.start..end_range.end;
923
924 // Reflow with adjusted line length (accounting for the 4-space indent)
925 let reflow_line_length = if config.line_length.is_unlimited() {
926 usize::MAX
927 } else {
928 config.line_length.get().saturating_sub(base_indent_len).max(1)
929 };
930 let reflow_options = crate::utils::text_reflow::ReflowOptions {
931 line_length: reflow_line_length,
932 break_on_sentences: true,
933 preserve_breaks: false,
934 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
935 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
936 abbreviations: config.abbreviations_for_reflow(),
937 length_mode: self.reflow_length_mode(),
938 };
939 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
940
941 // Re-add the 4-space indent to each reflowed line
942 let reflowed_with_indent: Vec<String> =
943 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
944 let reflowed_text = reflowed_with_indent.join(line_ending);
945
946 // Preserve trailing newline
947 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
948 format!("{reflowed_text}{line_ending}")
949 } else {
950 reflowed_text
951 };
952
953 // Only generate a warning if the replacement is different
954 let original_text = &ctx.content[byte_range.clone()];
955 if original_text != replacement {
956 warnings.push(LintWarning {
957 rule_name: Some(self.name().to_string()),
958 message: format!(
959 "Line length {} exceeds {} characters (in MkDocs container)",
960 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
961 config.line_length.get()
962 ),
963 line: container_start + 1,
964 column: 1,
965 end_line: end_line + 1,
966 end_column: lines[end_line].len() + 1,
967 severity: Severity::Warning,
968 fix: Some(crate::rule::Fix {
969 range: byte_range,
970 replacement,
971 }),
972 });
973 }
974 continue;
975 }
976
977 // Helper function to detect semantic line markers
978 let is_semantic_line = |content: &str| -> bool {
979 let trimmed = content.trim_start();
980 let semantic_markers = [
981 "NOTE:",
982 "WARNING:",
983 "IMPORTANT:",
984 "CAUTION:",
985 "TIP:",
986 "DANGER:",
987 "HINT:",
988 "INFO:",
989 ];
990 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
991 };
992
993 // Helper function to detect fence markers (opening or closing)
994 let is_fence_marker = |content: &str| -> bool {
995 let trimmed = content.trim_start();
996 trimmed.starts_with("```") || trimmed.starts_with("~~~")
997 };
998
999 // Check if this is a list item - handle it specially
1000 let trimmed = lines[i].trim();
1001 if is_list_item(trimmed) {
1002 // Collect the entire list item including continuation lines
1003 let list_start = i;
1004 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
1005 let marker_len = marker.len();
1006
1007 // Track lines and their types (content, code block, fence, nested list)
1008 #[derive(Clone)]
1009 enum LineType {
1010 Content(String),
1011 CodeBlock(String, usize), // content and original indent
1012 NestedListItem(String, usize), // full line content and original indent
1013 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
1014 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1015 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
1016 Empty,
1017 }
1018
1019 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1020 i += 1;
1021
1022 // Collect continuation lines using ctx.lines for metadata
1023 while i < lines.len() {
1024 let line_info = &ctx.lines[i];
1025
1026 // Use pre-computed is_blank from ctx
1027 if line_info.is_blank {
1028 // Empty line - check if next line is indented (part of list item)
1029 if i + 1 < lines.len() {
1030 let next_info = &ctx.lines[i + 1];
1031
1032 // Check if next line is indented enough to be continuation
1033 if !next_info.is_blank && next_info.indent >= marker_len {
1034 // This blank line is between paragraphs/blocks in the list item
1035 list_item_lines.push(LineType::Empty);
1036 i += 1;
1037 continue;
1038 }
1039 }
1040 // No indented line after blank, end of list item
1041 break;
1042 }
1043
1044 // Use pre-computed indent from ctx
1045 let indent = line_info.indent;
1046
1047 // Valid continuation must be indented at least marker_len
1048 if indent >= marker_len {
1049 let trimmed = line_info.content(ctx.content).trim();
1050
1051 // Use pre-computed in_code_block from ctx
1052 if line_info.in_code_block {
1053 list_item_lines.push(LineType::CodeBlock(
1054 line_info.content(ctx.content)[indent..].to_string(),
1055 indent,
1056 ));
1057 i += 1;
1058 continue;
1059 }
1060
1061 // Check if this is a SIBLING list item (breaks parent)
1062 // Nested lists are indented >= marker_len and are PART of the parent item
1063 // Siblings are at indent < marker_len (at or before parent marker)
1064 if is_list_item(trimmed) && indent < marker_len {
1065 // This is a sibling item at same or higher level - end parent item
1066 break;
1067 }
1068
1069 // Check if this is a NESTED list item marker
1070 // Nested lists should be processed separately UNLESS they're part of a
1071 // multi-paragraph list item (indicated by a blank line before them OR
1072 // it's a continuation of an already-started nested list)
1073 if is_list_item(trimmed) && indent >= marker_len {
1074 // Check if there was a blank line before this (multi-paragraph context)
1075 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
1076
1077 // Check if we've already seen nested list content (another nested item)
1078 let has_nested_content = list_item_lines.iter().any(|line| {
1079 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
1080 || matches!(line, LineType::NestedListItem(_, _))
1081 });
1082
1083 if !has_blank_before && !has_nested_content {
1084 // Single-paragraph context with no prior nested items: starts a new item
1085 // End parent collection; nested list will be processed next
1086 break;
1087 }
1088 // else: multi-paragraph context or continuation of nested list, keep collecting
1089 // Mark this as a nested list item to preserve its structure
1090 list_item_lines.push(LineType::NestedListItem(
1091 line_info.content(ctx.content)[indent..].to_string(),
1092 indent,
1093 ));
1094 i += 1;
1095 continue;
1096 }
1097
1098 // Normal continuation: marker_len to marker_len+3
1099 if indent <= marker_len + 3 {
1100 // Extract content (remove indentation and trailing whitespace)
1101 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1102 // See: https://github.com/rvben/rumdl/issues/76
1103 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1104
1105 // Check if this is a div marker (::: opening or closing)
1106 // These must be preserved on their own line, not merged into paragraphs
1107 if line_info.is_div_marker {
1108 list_item_lines.push(LineType::DivMarker(content));
1109 }
1110 // Check if this is a fence marker (opening or closing)
1111 // These should be treated as code block lines, not paragraph content
1112 else if is_fence_marker(&content) {
1113 list_item_lines.push(LineType::CodeBlock(content, indent));
1114 }
1115 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1116 else if is_semantic_line(&content) {
1117 list_item_lines.push(LineType::SemanticLine(content));
1118 }
1119 // Check if this is a snippet block delimiter (-8<- or --8<--)
1120 // These must be preserved on their own lines for MkDocs Snippets extension
1121 else if is_snippet_block_delimiter(&content) {
1122 list_item_lines.push(LineType::SnippetLine(content));
1123 } else {
1124 list_item_lines.push(LineType::Content(content));
1125 }
1126 i += 1;
1127 } else {
1128 // indent >= marker_len + 4: indented code block
1129 list_item_lines.push(LineType::CodeBlock(
1130 line_info.content(ctx.content)[indent..].to_string(),
1131 indent,
1132 ));
1133 i += 1;
1134 }
1135 } else {
1136 // Not indented enough, end of list item
1137 break;
1138 }
1139 }
1140
1141 let indent_size = marker_len;
1142 let expected_indent = " ".repeat(indent_size);
1143
1144 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1145 #[derive(Clone)]
1146 enum Block {
1147 Paragraph(Vec<String>),
1148 Code {
1149 lines: Vec<(String, usize)>, // (content, indent) pairs
1150 has_preceding_blank: bool, // Whether there was a blank line before this block
1151 },
1152 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
1153 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1154 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
1155 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1156 Html {
1157 lines: Vec<String>, // HTML content preserved exactly as-is
1158 has_preceding_blank: bool, // Whether there was a blank line before this block
1159 },
1160 }
1161
1162 // HTML tag detection helpers
1163 // Block-level HTML tags that should trigger HTML block detection
1164 const BLOCK_LEVEL_TAGS: &[&str] = &[
1165 "div",
1166 "details",
1167 "summary",
1168 "section",
1169 "article",
1170 "header",
1171 "footer",
1172 "nav",
1173 "aside",
1174 "main",
1175 "table",
1176 "thead",
1177 "tbody",
1178 "tfoot",
1179 "tr",
1180 "td",
1181 "th",
1182 "ul",
1183 "ol",
1184 "li",
1185 "dl",
1186 "dt",
1187 "dd",
1188 "pre",
1189 "blockquote",
1190 "figure",
1191 "figcaption",
1192 "form",
1193 "fieldset",
1194 "legend",
1195 "hr",
1196 "p",
1197 "h1",
1198 "h2",
1199 "h3",
1200 "h4",
1201 "h5",
1202 "h6",
1203 "style",
1204 "script",
1205 "noscript",
1206 ];
1207
1208 fn is_block_html_opening_tag(line: &str) -> Option<String> {
1209 let trimmed = line.trim();
1210
1211 // Check for HTML comments
1212 if trimmed.starts_with("<!--") {
1213 return Some("!--".to_string());
1214 }
1215
1216 // Check for opening tags
1217 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1218 // Extract tag name from <tagname ...> or <tagname>
1219 let after_bracket = &trimmed[1..];
1220 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1221 let tag_name = after_bracket[..end].to_lowercase();
1222
1223 // Only treat as block if it's a known block-level tag
1224 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1225 return Some(tag_name);
1226 }
1227 }
1228 }
1229 None
1230 }
1231
1232 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1233 let trimmed = line.trim();
1234
1235 // Special handling for HTML comments
1236 if tag_name == "!--" {
1237 return trimmed.ends_with("-->");
1238 }
1239
1240 // Check for closing tags: </tagname> or </tagname ...>
1241 trimmed.starts_with(&format!("</{tag_name}>"))
1242 || trimmed.starts_with(&format!("</{tag_name} "))
1243 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1244 }
1245
1246 fn is_self_closing_tag(line: &str) -> bool {
1247 let trimmed = line.trim();
1248 trimmed.ends_with("/>")
1249 }
1250
1251 let mut blocks: Vec<Block> = Vec::new();
1252 let mut current_paragraph: Vec<String> = Vec::new();
1253 let mut current_code_block: Vec<(String, usize)> = Vec::new();
1254 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
1255 let mut current_html_block: Vec<String> = Vec::new();
1256 let mut html_tag_stack: Vec<String> = Vec::new();
1257 let mut in_code = false;
1258 let mut in_nested_list = false;
1259 let mut in_html_block = false;
1260 let mut had_preceding_blank = false; // Track if we just saw an empty line
1261 let mut code_block_has_preceding_blank = false; // Track blank before current code block
1262 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1263
1264 for line in &list_item_lines {
1265 match line {
1266 LineType::Empty => {
1267 if in_code {
1268 current_code_block.push((String::new(), 0));
1269 } else if in_nested_list {
1270 current_nested_list.push((String::new(), 0));
1271 } else if in_html_block {
1272 // Allow blank lines inside HTML blocks
1273 current_html_block.push(String::new());
1274 } else if !current_paragraph.is_empty() {
1275 blocks.push(Block::Paragraph(current_paragraph.clone()));
1276 current_paragraph.clear();
1277 }
1278 // Mark that we saw a blank line
1279 had_preceding_blank = true;
1280 }
1281 LineType::Content(content) => {
1282 // Check if we're currently in an HTML block
1283 if in_html_block {
1284 current_html_block.push(content.clone());
1285
1286 // Check if this line closes any open HTML tags
1287 if let Some(last_tag) = html_tag_stack.last() {
1288 if is_html_closing_tag(content, last_tag) {
1289 html_tag_stack.pop();
1290
1291 // If stack is empty, HTML block is complete
1292 if html_tag_stack.is_empty() {
1293 blocks.push(Block::Html {
1294 lines: current_html_block.clone(),
1295 has_preceding_blank: html_block_has_preceding_blank,
1296 });
1297 current_html_block.clear();
1298 in_html_block = false;
1299 }
1300 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1301 // Nested opening tag within HTML block
1302 if !is_self_closing_tag(content) {
1303 html_tag_stack.push(new_tag);
1304 }
1305 }
1306 }
1307 had_preceding_blank = false;
1308 } else {
1309 // Not in HTML block - check if this line starts one
1310 if let Some(tag_name) = is_block_html_opening_tag(content) {
1311 // Flush current paragraph before starting HTML block
1312 if in_code {
1313 blocks.push(Block::Code {
1314 lines: current_code_block.clone(),
1315 has_preceding_blank: code_block_has_preceding_blank,
1316 });
1317 current_code_block.clear();
1318 in_code = false;
1319 } else if in_nested_list {
1320 blocks.push(Block::NestedList(current_nested_list.clone()));
1321 current_nested_list.clear();
1322 in_nested_list = false;
1323 } else if !current_paragraph.is_empty() {
1324 blocks.push(Block::Paragraph(current_paragraph.clone()));
1325 current_paragraph.clear();
1326 }
1327
1328 // Start new HTML block
1329 in_html_block = true;
1330 html_block_has_preceding_blank = had_preceding_blank;
1331 current_html_block.push(content.clone());
1332
1333 // Check if it's self-closing or needs a closing tag
1334 if is_self_closing_tag(content) {
1335 // Self-closing tag - complete the HTML block immediately
1336 blocks.push(Block::Html {
1337 lines: current_html_block.clone(),
1338 has_preceding_blank: html_block_has_preceding_blank,
1339 });
1340 current_html_block.clear();
1341 in_html_block = false;
1342 } else {
1343 // Regular opening tag - push to stack
1344 html_tag_stack.push(tag_name);
1345 }
1346 } else {
1347 // Regular content line - add to paragraph
1348 if in_code {
1349 // Switching from code to content
1350 blocks.push(Block::Code {
1351 lines: current_code_block.clone(),
1352 has_preceding_blank: code_block_has_preceding_blank,
1353 });
1354 current_code_block.clear();
1355 in_code = false;
1356 } else if in_nested_list {
1357 // Switching from nested list to content
1358 blocks.push(Block::NestedList(current_nested_list.clone()));
1359 current_nested_list.clear();
1360 in_nested_list = false;
1361 }
1362 current_paragraph.push(content.clone());
1363 }
1364 had_preceding_blank = false; // Reset after content
1365 }
1366 }
1367 LineType::CodeBlock(content, indent) => {
1368 if in_nested_list {
1369 // Switching from nested list to code
1370 blocks.push(Block::NestedList(current_nested_list.clone()));
1371 current_nested_list.clear();
1372 in_nested_list = false;
1373 } else if in_html_block {
1374 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1375 blocks.push(Block::Html {
1376 lines: current_html_block.clone(),
1377 has_preceding_blank: html_block_has_preceding_blank,
1378 });
1379 current_html_block.clear();
1380 html_tag_stack.clear();
1381 in_html_block = false;
1382 }
1383 if !in_code {
1384 // Switching from content to code
1385 if !current_paragraph.is_empty() {
1386 blocks.push(Block::Paragraph(current_paragraph.clone()));
1387 current_paragraph.clear();
1388 }
1389 in_code = true;
1390 // Record whether there was a blank line before this code block
1391 code_block_has_preceding_blank = had_preceding_blank;
1392 }
1393 current_code_block.push((content.clone(), *indent));
1394 had_preceding_blank = false; // Reset after code
1395 }
1396 LineType::NestedListItem(content, indent) => {
1397 if in_code {
1398 // Switching from code to nested list
1399 blocks.push(Block::Code {
1400 lines: current_code_block.clone(),
1401 has_preceding_blank: code_block_has_preceding_blank,
1402 });
1403 current_code_block.clear();
1404 in_code = false;
1405 } else if in_html_block {
1406 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1407 blocks.push(Block::Html {
1408 lines: current_html_block.clone(),
1409 has_preceding_blank: html_block_has_preceding_blank,
1410 });
1411 current_html_block.clear();
1412 html_tag_stack.clear();
1413 in_html_block = false;
1414 }
1415 if !in_nested_list {
1416 // Switching from content to nested list
1417 if !current_paragraph.is_empty() {
1418 blocks.push(Block::Paragraph(current_paragraph.clone()));
1419 current_paragraph.clear();
1420 }
1421 in_nested_list = true;
1422 }
1423 current_nested_list.push((content.clone(), *indent));
1424 had_preceding_blank = false; // Reset after nested list
1425 }
1426 LineType::SemanticLine(content) => {
1427 // Semantic lines are standalone - flush any current block and add as separate block
1428 if in_code {
1429 blocks.push(Block::Code {
1430 lines: current_code_block.clone(),
1431 has_preceding_blank: code_block_has_preceding_blank,
1432 });
1433 current_code_block.clear();
1434 in_code = false;
1435 } else if in_nested_list {
1436 blocks.push(Block::NestedList(current_nested_list.clone()));
1437 current_nested_list.clear();
1438 in_nested_list = false;
1439 } else if in_html_block {
1440 blocks.push(Block::Html {
1441 lines: current_html_block.clone(),
1442 has_preceding_blank: html_block_has_preceding_blank,
1443 });
1444 current_html_block.clear();
1445 html_tag_stack.clear();
1446 in_html_block = false;
1447 } else if !current_paragraph.is_empty() {
1448 blocks.push(Block::Paragraph(current_paragraph.clone()));
1449 current_paragraph.clear();
1450 }
1451 // Add semantic line as its own block
1452 blocks.push(Block::SemanticLine(content.clone()));
1453 had_preceding_blank = false; // Reset after semantic line
1454 }
1455 LineType::SnippetLine(content) => {
1456 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1457 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1458 if in_code {
1459 blocks.push(Block::Code {
1460 lines: current_code_block.clone(),
1461 has_preceding_blank: code_block_has_preceding_blank,
1462 });
1463 current_code_block.clear();
1464 in_code = false;
1465 } else if in_nested_list {
1466 blocks.push(Block::NestedList(current_nested_list.clone()));
1467 current_nested_list.clear();
1468 in_nested_list = false;
1469 } else if in_html_block {
1470 blocks.push(Block::Html {
1471 lines: current_html_block.clone(),
1472 has_preceding_blank: html_block_has_preceding_blank,
1473 });
1474 current_html_block.clear();
1475 html_tag_stack.clear();
1476 in_html_block = false;
1477 } else if !current_paragraph.is_empty() {
1478 blocks.push(Block::Paragraph(current_paragraph.clone()));
1479 current_paragraph.clear();
1480 }
1481 // Add snippet line as its own block
1482 blocks.push(Block::SnippetLine(content.clone()));
1483 had_preceding_blank = false;
1484 }
1485 LineType::DivMarker(content) => {
1486 // Div markers (::: opening or closing) are standalone structural delimiters
1487 // Flush any current block and add as separate block
1488 if in_code {
1489 blocks.push(Block::Code {
1490 lines: current_code_block.clone(),
1491 has_preceding_blank: code_block_has_preceding_blank,
1492 });
1493 current_code_block.clear();
1494 in_code = false;
1495 } else if in_nested_list {
1496 blocks.push(Block::NestedList(current_nested_list.clone()));
1497 current_nested_list.clear();
1498 in_nested_list = false;
1499 } else if in_html_block {
1500 blocks.push(Block::Html {
1501 lines: current_html_block.clone(),
1502 has_preceding_blank: html_block_has_preceding_blank,
1503 });
1504 current_html_block.clear();
1505 html_tag_stack.clear();
1506 in_html_block = false;
1507 } else if !current_paragraph.is_empty() {
1508 blocks.push(Block::Paragraph(current_paragraph.clone()));
1509 current_paragraph.clear();
1510 }
1511 blocks.push(Block::DivMarker(content.clone()));
1512 had_preceding_blank = false;
1513 }
1514 }
1515 }
1516
1517 // Push remaining block
1518 if in_code && !current_code_block.is_empty() {
1519 blocks.push(Block::Code {
1520 lines: current_code_block,
1521 has_preceding_blank: code_block_has_preceding_blank,
1522 });
1523 } else if in_nested_list && !current_nested_list.is_empty() {
1524 blocks.push(Block::NestedList(current_nested_list));
1525 } else if in_html_block && !current_html_block.is_empty() {
1526 // If we still have an unclosed HTML block, push it anyway
1527 // (malformed HTML - missing closing tag)
1528 blocks.push(Block::Html {
1529 lines: current_html_block,
1530 has_preceding_blank: html_block_has_preceding_blank,
1531 });
1532 } else if !current_paragraph.is_empty() {
1533 blocks.push(Block::Paragraph(current_paragraph));
1534 }
1535
1536 // Helper: check if a line (raw source or stripped content) is exempt
1537 // from line-length checks. Link reference definitions are always exempt;
1538 // standalone link/image lines are exempt when strict mode is off.
1539 // Also checks content after stripping list markers, since list item
1540 // continuation lines may contain link ref defs.
1541 let is_exempt_line = |raw_line: &str| -> bool {
1542 let trimmed = raw_line.trim();
1543 // Link reference definitions: always exempt
1544 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
1545 return true;
1546 }
1547 // Also check after stripping list markers (for list item content)
1548 if is_list_item(trimmed) {
1549 let (_, content) = extract_list_marker_and_content(trimmed);
1550 let content_trimmed = content.trim();
1551 if content_trimmed.starts_with('[')
1552 && content_trimmed.contains("]:")
1553 && LINK_REF_PATTERN.is_match(content_trimmed)
1554 {
1555 return true;
1556 }
1557 }
1558 // Standalone link/image lines: exempt when not strict
1559 if !config.strict && is_standalone_link_or_image_line(raw_line) {
1560 return true;
1561 }
1562 false
1563 };
1564
1565 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1566 // Exclude link reference definitions and standalone link lines from content
1567 // so they don't pollute combined_content or trigger false reflow.
1568 let content_lines: Vec<String> = list_item_lines
1569 .iter()
1570 .filter_map(|line| {
1571 if let LineType::Content(s) = line {
1572 if is_exempt_line(s) {
1573 return None;
1574 }
1575 Some(s.clone())
1576 } else {
1577 None
1578 }
1579 })
1580 .collect();
1581
1582 // Check if we need to reflow this list item
1583 // We check the combined content to see if it exceeds length limits
1584 let combined_content = content_lines.join(" ").trim().to_string();
1585
1586 // Helper to check if we should reflow in normalize mode
1587 let should_normalize = || {
1588 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1589 // DO normalize if it has plain text content that spans multiple lines
1590 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1591 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1592 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1593 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1594 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1595 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1596
1597 // If we have structural blocks but no paragraphs, don't normalize
1598 if (has_nested_lists
1599 || has_code_blocks
1600 || has_semantic_lines
1601 || has_snippet_lines
1602 || has_div_markers)
1603 && !has_paragraphs
1604 {
1605 return false;
1606 }
1607
1608 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1609 if has_paragraphs {
1610 // Count only paragraphs that contain at least one non-exempt line.
1611 // Paragraphs consisting entirely of link ref defs or standalone links
1612 // should not trigger normalization.
1613 let paragraph_count = blocks
1614 .iter()
1615 .filter(|b| {
1616 if let Block::Paragraph(para_lines) = b {
1617 !para_lines.iter().all(|line| is_exempt_line(line))
1618 } else {
1619 false
1620 }
1621 })
1622 .count();
1623 if paragraph_count > 1 {
1624 // Multiple non-exempt paragraph blocks should be normalized
1625 return true;
1626 }
1627
1628 // Single paragraph block: normalize if it has multiple content lines
1629 if content_lines.len() > 1 {
1630 return true;
1631 }
1632 }
1633
1634 false
1635 };
1636
1637 let needs_reflow = match config.reflow_mode {
1638 ReflowMode::Normalize => {
1639 // Only reflow if:
1640 // 1. Any non-exempt paragraph, when joined, exceeds the limit, OR
1641 // 2. The list item should be normalized (has multi-line plain text)
1642 let any_paragraph_exceeds = blocks.iter().any(|block| {
1643 if let Block::Paragraph(para_lines) = block {
1644 if para_lines.iter().all(|line| is_exempt_line(line)) {
1645 return false;
1646 }
1647 let joined = para_lines.join(" ");
1648 let with_marker = format!("{}{}", " ".repeat(marker_len), joined.trim());
1649 self.calculate_effective_length(&with_marker) > config.line_length.get()
1650 } else {
1651 false
1652 }
1653 });
1654 if any_paragraph_exceeds {
1655 true
1656 } else {
1657 should_normalize()
1658 }
1659 }
1660 ReflowMode::SentencePerLine => {
1661 // Check if list item has multiple sentences
1662 let sentences = split_into_sentences(&combined_content);
1663 sentences.len() > 1
1664 }
1665 ReflowMode::SemanticLineBreaks => {
1666 let sentences = split_into_sentences(&combined_content);
1667 sentences.len() > 1
1668 || (list_start..i).any(|line_idx| {
1669 let line = lines[line_idx];
1670 let trimmed = line.trim();
1671 if trimmed.is_empty() || is_exempt_line(line) {
1672 return false;
1673 }
1674 self.calculate_effective_length(line) > config.line_length.get()
1675 })
1676 }
1677 ReflowMode::Default => {
1678 // In default mode, only reflow if any individual non-exempt line exceeds limit
1679 (list_start..i).any(|line_idx| {
1680 let line = lines[line_idx];
1681 let trimmed = line.trim();
1682 // Skip blank lines and exempt lines
1683 if trimmed.is_empty() || is_exempt_line(line) {
1684 return false;
1685 }
1686 self.calculate_effective_length(line) > config.line_length.get()
1687 })
1688 }
1689 };
1690
1691 if needs_reflow {
1692 let start_range = line_index.whole_line_range(list_start + 1);
1693 let end_line = i - 1;
1694 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1695 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1696 } else {
1697 line_index.whole_line_range(end_line + 1)
1698 };
1699 let byte_range = start_range.start..end_range.end;
1700
1701 // Reflow each block (paragraphs only, preserve code blocks)
1702 // When line_length = 0 (no limit), use a very large value for reflow
1703 let reflow_line_length = if config.line_length.is_unlimited() {
1704 usize::MAX
1705 } else {
1706 config.line_length.get().saturating_sub(indent_size).max(1)
1707 };
1708 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1709 line_length: reflow_line_length,
1710 break_on_sentences: true,
1711 preserve_breaks: false,
1712 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1713 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1714 abbreviations: config.abbreviations_for_reflow(),
1715 length_mode: self.reflow_length_mode(),
1716 };
1717
1718 let mut result: Vec<String> = Vec::new();
1719 let mut is_first_block = true;
1720
1721 for (block_idx, block) in blocks.iter().enumerate() {
1722 match block {
1723 Block::Paragraph(para_lines) => {
1724 // If every line in this paragraph is exempt (link ref defs,
1725 // standalone links), preserve the paragraph verbatim instead
1726 // of reflowing it. Reflowing would corrupt link ref defs.
1727 let all_exempt = para_lines.iter().all(|line| is_exempt_line(line));
1728
1729 if all_exempt {
1730 for (idx, line) in para_lines.iter().enumerate() {
1731 if is_first_block && idx == 0 {
1732 result.push(format!("{marker}{line}"));
1733 is_first_block = false;
1734 } else {
1735 result.push(format!("{expected_indent}{line}"));
1736 }
1737 }
1738 } else {
1739 // Split the paragraph into segments at hard break boundaries
1740 // Each segment can be reflowed independently
1741 let segments = split_into_segments(para_lines);
1742
1743 for (segment_idx, segment) in segments.iter().enumerate() {
1744 // Check if this segment ends with a hard break and what type
1745 let hard_break_type = segment.last().and_then(|line| {
1746 let line = line.strip_suffix('\r').unwrap_or(line);
1747 if line.ends_with('\\') {
1748 Some("\\")
1749 } else if line.ends_with(" ") {
1750 Some(" ")
1751 } else {
1752 None
1753 }
1754 });
1755
1756 // Join and reflow the segment (removing the hard break marker for processing)
1757 let segment_for_reflow: Vec<String> = segment
1758 .iter()
1759 .map(|line| {
1760 // Strip hard break marker (2 spaces or backslash) for reflow processing
1761 if line.ends_with('\\') {
1762 line[..line.len() - 1].trim_end().to_string()
1763 } else if line.ends_with(" ") {
1764 line[..line.len() - 2].trim_end().to_string()
1765 } else {
1766 line.clone()
1767 }
1768 })
1769 .collect();
1770
1771 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1772 if !segment_text.is_empty() {
1773 let reflowed =
1774 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1775
1776 if is_first_block && segment_idx == 0 {
1777 // First segment of first block starts with marker
1778 result.push(format!("{marker}{}", reflowed[0]));
1779 for line in reflowed.iter().skip(1) {
1780 result.push(format!("{expected_indent}{line}"));
1781 }
1782 is_first_block = false;
1783 } else {
1784 // Subsequent segments
1785 for line in reflowed {
1786 result.push(format!("{expected_indent}{line}"));
1787 }
1788 }
1789
1790 // If this segment had a hard break, add it back to the last line
1791 // Preserve the original hard break format (backslash or two spaces)
1792 if let Some(break_marker) = hard_break_type
1793 && let Some(last_line) = result.last_mut()
1794 {
1795 last_line.push_str(break_marker);
1796 }
1797 }
1798 }
1799 }
1800
1801 // Add blank line after paragraph block if there's a next block.
1802 // Check if next block is a code block that doesn't want a preceding blank.
1803 // Also don't add blank lines before snippet lines (they should stay tight).
1804 // Only add if not already ending with one (avoids double blanks).
1805 if block_idx < blocks.len() - 1 {
1806 let next_block = &blocks[block_idx + 1];
1807 let should_add_blank = match next_block {
1808 Block::Code {
1809 has_preceding_blank, ..
1810 } => *has_preceding_blank,
1811 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1812 _ => true, // For all other blocks, add blank line
1813 };
1814 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1815 {
1816 result.push(String::new());
1817 }
1818 }
1819 }
1820 Block::Code {
1821 lines: code_lines,
1822 has_preceding_blank: _,
1823 } => {
1824 // Preserve code blocks as-is with original indentation
1825 // NOTE: Blank line before code block is handled by the previous block
1826 // (see paragraph block's logic above)
1827
1828 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1829 if is_first_block && idx == 0 {
1830 // First line of first block gets marker
1831 result.push(format!(
1832 "{marker}{}",
1833 " ".repeat(orig_indent - marker_len) + content
1834 ));
1835 is_first_block = false;
1836 } else if content.is_empty() {
1837 result.push(String::new());
1838 } else {
1839 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1840 }
1841 }
1842 }
1843 Block::NestedList(nested_items) => {
1844 // Preserve nested list items as-is with original indentation.
1845 // Only add blank before if not already ending with one (avoids
1846 // double blanks when the preceding block already added one).
1847 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1848 result.push(String::new());
1849 }
1850
1851 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1852 if is_first_block && idx == 0 {
1853 // First line of first block gets marker
1854 result.push(format!(
1855 "{marker}{}",
1856 " ".repeat(orig_indent - marker_len) + content
1857 ));
1858 is_first_block = false;
1859 } else if content.is_empty() {
1860 result.push(String::new());
1861 } else {
1862 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1863 }
1864 }
1865
1866 // Add blank line after nested list if there's a next block.
1867 // Only add if not already ending with one (avoids double blanks
1868 // when the last nested item was already a blank line).
1869 if block_idx < blocks.len() - 1 {
1870 let next_block = &blocks[block_idx + 1];
1871 let should_add_blank = match next_block {
1872 Block::Code {
1873 has_preceding_blank, ..
1874 } => *has_preceding_blank,
1875 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1876 _ => true, // For all other blocks, add blank line
1877 };
1878 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1879 {
1880 result.push(String::new());
1881 }
1882 }
1883 }
1884 Block::SemanticLine(content) => {
1885 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
1886 // Only add blank before if not already ending with one.
1887 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
1888 result.push(String::new());
1889 }
1890
1891 if is_first_block {
1892 // First block starts with marker
1893 result.push(format!("{marker}{content}"));
1894 is_first_block = false;
1895 } else {
1896 // Subsequent blocks use expected indent
1897 result.push(format!("{expected_indent}{content}"));
1898 }
1899
1900 // Add blank line after semantic line if there's a next block.
1901 // Only add if not already ending with one.
1902 if block_idx < blocks.len() - 1 {
1903 let next_block = &blocks[block_idx + 1];
1904 let should_add_blank = match next_block {
1905 Block::Code {
1906 has_preceding_blank, ..
1907 } => *has_preceding_blank,
1908 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1909 _ => true, // For all other blocks, add blank line
1910 };
1911 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1912 {
1913 result.push(String::new());
1914 }
1915 }
1916 }
1917 Block::SnippetLine(content) => {
1918 // Preserve snippet delimiters (-8<-) as-is on their own line
1919 // Unlike semantic lines, snippet lines don't add extra blank lines
1920 if is_first_block {
1921 // First block starts with marker
1922 result.push(format!("{marker}{content}"));
1923 is_first_block = false;
1924 } else {
1925 // Subsequent blocks use expected indent
1926 result.push(format!("{expected_indent}{content}"));
1927 }
1928 // No blank lines added before or after snippet delimiters
1929 }
1930 Block::DivMarker(content) => {
1931 // Preserve div markers (::: opening or closing) as-is on their own line
1932 if is_first_block {
1933 result.push(format!("{marker}{content}"));
1934 is_first_block = false;
1935 } else {
1936 result.push(format!("{expected_indent}{content}"));
1937 }
1938 }
1939 Block::Html {
1940 lines: html_lines,
1941 has_preceding_blank: _,
1942 } => {
1943 // Preserve HTML blocks exactly as-is with original indentation
1944 // NOTE: Blank line before HTML block is handled by the previous block
1945
1946 for (idx, line) in html_lines.iter().enumerate() {
1947 if is_first_block && idx == 0 {
1948 // First line of first block gets marker
1949 result.push(format!("{marker}{line}"));
1950 is_first_block = false;
1951 } else if line.is_empty() {
1952 // Preserve blank lines inside HTML blocks
1953 result.push(String::new());
1954 } else {
1955 // Preserve lines with their original content (already includes indentation)
1956 result.push(format!("{expected_indent}{line}"));
1957 }
1958 }
1959
1960 // Add blank line after HTML block if there's a next block.
1961 // Only add if not already ending with one (avoids double blanks
1962 // when the HTML block itself contained a trailing blank line).
1963 if block_idx < blocks.len() - 1 {
1964 let next_block = &blocks[block_idx + 1];
1965 let should_add_blank = match next_block {
1966 Block::Code {
1967 has_preceding_blank, ..
1968 } => *has_preceding_blank,
1969 Block::Html {
1970 has_preceding_blank, ..
1971 } => *has_preceding_blank,
1972 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1973 _ => true, // For all other blocks, add blank line
1974 };
1975 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1976 {
1977 result.push(String::new());
1978 }
1979 }
1980 }
1981 }
1982 }
1983
1984 let reflowed_text = result.join(line_ending);
1985
1986 // Preserve trailing newline
1987 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1988 format!("{reflowed_text}{line_ending}")
1989 } else {
1990 reflowed_text
1991 };
1992
1993 // Get the original text to compare
1994 let original_text = &ctx.content[byte_range.clone()];
1995
1996 // Only generate a warning if the replacement is different from the original
1997 if original_text != replacement {
1998 // Generate an appropriate message based on why reflow is needed
1999 let message = match config.reflow_mode {
2000 ReflowMode::SentencePerLine => {
2001 let num_sentences = split_into_sentences(&combined_content).len();
2002 let num_lines = content_lines.len();
2003 if num_lines == 1 {
2004 // Single line with multiple sentences
2005 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2006 } else {
2007 // Multiple lines - could be split sentences or mixed
2008 format!(
2009 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
2010 )
2011 }
2012 }
2013 ReflowMode::SemanticLineBreaks => {
2014 let num_sentences = split_into_sentences(&combined_content).len();
2015 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
2016 }
2017 ReflowMode::Normalize => {
2018 // Find the longest non-exempt paragraph when joined
2019 let max_para_length = blocks
2020 .iter()
2021 .filter_map(|block| {
2022 if let Block::Paragraph(para_lines) = block {
2023 if para_lines.iter().all(|line| is_exempt_line(line)) {
2024 return None;
2025 }
2026 let joined = para_lines.join(" ");
2027 let with_indent = format!("{}{}", " ".repeat(marker_len), joined.trim());
2028 Some(self.calculate_effective_length(&with_indent))
2029 } else {
2030 None
2031 }
2032 })
2033 .max()
2034 .unwrap_or(0);
2035 if max_para_length > config.line_length.get() {
2036 format!(
2037 "Line length {} exceeds {} characters",
2038 max_para_length,
2039 config.line_length.get()
2040 )
2041 } else {
2042 "Multi-line content can be normalized".to_string()
2043 }
2044 }
2045 ReflowMode::Default => {
2046 // Report the actual longest non-exempt line, not the combined content
2047 let max_length = (list_start..i)
2048 .filter(|&line_idx| {
2049 let line = lines[line_idx];
2050 let trimmed = line.trim();
2051 !trimmed.is_empty() && !is_exempt_line(line)
2052 })
2053 .map(|line_idx| self.calculate_effective_length(lines[line_idx]))
2054 .max()
2055 .unwrap_or(0);
2056 format!(
2057 "Line length {} exceeds {} characters",
2058 max_length,
2059 config.line_length.get()
2060 )
2061 }
2062 };
2063
2064 warnings.push(LintWarning {
2065 rule_name: Some(self.name().to_string()),
2066 message,
2067 line: list_start + 1,
2068 column: 1,
2069 end_line: end_line + 1,
2070 end_column: lines[end_line].len() + 1,
2071 severity: Severity::Warning,
2072 fix: Some(crate::rule::Fix {
2073 range: byte_range,
2074 replacement,
2075 }),
2076 });
2077 }
2078 }
2079 continue;
2080 }
2081
2082 // Found start of a paragraph - collect all lines in it
2083 let paragraph_start = i;
2084 let mut paragraph_lines = vec![lines[i]];
2085 i += 1;
2086
2087 while i < lines.len() {
2088 let next_line = lines[i];
2089 let next_line_num = i + 1;
2090 let next_trimmed = next_line.trim();
2091
2092 // Stop at paragraph boundaries
2093 if next_trimmed.is_empty()
2094 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
2095 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
2096 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
2097 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
2098 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
2099 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
2100 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
2101 || ctx
2102 .line_info(next_line_num)
2103 .is_some_and(|info| info.in_mkdocs_container())
2104 || (next_line_num > 0
2105 && next_line_num <= ctx.lines.len()
2106 && ctx.lines[next_line_num - 1].blockquote.is_some())
2107 || next_trimmed.starts_with('#')
2108 || TableUtils::is_potential_table_row(next_line)
2109 || is_list_item(next_trimmed)
2110 || is_horizontal_rule(next_trimmed)
2111 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
2112 || is_template_directive_only(next_line)
2113 || is_standalone_attr_list(next_line)
2114 || is_snippet_block_delimiter(next_line)
2115 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
2116 {
2117 break;
2118 }
2119
2120 // Check if the previous line ends with a hard break (2+ spaces or backslash)
2121 if i > 0 && has_hard_break(lines[i - 1]) {
2122 // Don't include lines after hard breaks in the same paragraph
2123 break;
2124 }
2125
2126 paragraph_lines.push(next_line);
2127 i += 1;
2128 }
2129
2130 // Combine paragraph lines into a single string for processing
2131 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
2132 let paragraph_text = paragraph_lines.join(" ");
2133
2134 // Skip reflowing if this paragraph contains definition list items
2135 // Definition lists are multi-line structures that should not be joined
2136 let contains_definition_list = paragraph_lines
2137 .iter()
2138 .any(|line| crate::utils::is_definition_list_item(line));
2139
2140 if contains_definition_list {
2141 // Don't reflow definition lists - skip this paragraph
2142 i = paragraph_start + paragraph_lines.len();
2143 continue;
2144 }
2145
2146 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2147 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2148 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2149
2150 if contains_snippets {
2151 // Don't reflow Snippets blocks - skip this paragraph
2152 i = paragraph_start + paragraph_lines.len();
2153 continue;
2154 }
2155
2156 // Check if this paragraph needs reflowing
2157 let needs_reflow = match config.reflow_mode {
2158 ReflowMode::Normalize => {
2159 // In normalize mode, reflow multi-line paragraphs
2160 paragraph_lines.len() > 1
2161 }
2162 ReflowMode::SentencePerLine => {
2163 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2164 // Note: we check the joined text because sentences can span multiple lines
2165 let sentences = split_into_sentences(¶graph_text);
2166
2167 // Always reflow if multiple sentences on one line
2168 if sentences.len() > 1 {
2169 true
2170 } else if paragraph_lines.len() > 1 {
2171 // For single-sentence paragraphs spanning multiple lines:
2172 // Reflow if they COULD fit on one line (respecting line-length constraint)
2173 if config.line_length.is_unlimited() {
2174 // No line-length constraint - always join single sentences
2175 true
2176 } else {
2177 // Only join if it fits within line-length
2178 let effective_length = self.calculate_effective_length(¶graph_text);
2179 effective_length <= config.line_length.get()
2180 }
2181 } else {
2182 false
2183 }
2184 }
2185 ReflowMode::SemanticLineBreaks => {
2186 let sentences = split_into_sentences(¶graph_text);
2187 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2188 sentences.len() > 1
2189 || paragraph_lines.len() > 1
2190 || paragraph_lines
2191 .iter()
2192 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2193 }
2194 ReflowMode::Default => {
2195 // In default mode, only reflow if lines exceed limit
2196 paragraph_lines
2197 .iter()
2198 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2199 }
2200 };
2201
2202 if needs_reflow {
2203 // Calculate byte range for this paragraph
2204 // Use whole_line_range for each line and combine
2205 let start_range = line_index.whole_line_range(paragraph_start + 1);
2206 let end_line = paragraph_start + paragraph_lines.len() - 1;
2207
2208 // For the last line, we want to preserve any trailing newline
2209 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2210 // Last line without trailing newline - use line_text_range
2211 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2212 } else {
2213 // Not the last line or has trailing newline - use whole_line_range
2214 line_index.whole_line_range(end_line + 1)
2215 };
2216
2217 let byte_range = start_range.start..end_range.end;
2218
2219 // Check if the paragraph ends with a hard break and what type
2220 let hard_break_type = paragraph_lines.last().and_then(|line| {
2221 let line = line.strip_suffix('\r').unwrap_or(line);
2222 if line.ends_with('\\') {
2223 Some("\\")
2224 } else if line.ends_with(" ") {
2225 Some(" ")
2226 } else {
2227 None
2228 }
2229 });
2230
2231 // Reflow the paragraph
2232 // When line_length = 0 (no limit), use a very large value for reflow
2233 let reflow_line_length = if config.line_length.is_unlimited() {
2234 usize::MAX
2235 } else {
2236 config.line_length.get()
2237 };
2238 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2239 line_length: reflow_line_length,
2240 break_on_sentences: true,
2241 preserve_breaks: false,
2242 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2243 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2244 abbreviations: config.abbreviations_for_reflow(),
2245 length_mode: self.reflow_length_mode(),
2246 };
2247 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
2248
2249 // If the original paragraph ended with a hard break, preserve it
2250 // Preserve the original hard break format (backslash or two spaces)
2251 if let Some(break_marker) = hard_break_type
2252 && !reflowed.is_empty()
2253 {
2254 let last_idx = reflowed.len() - 1;
2255 if !has_hard_break(&reflowed[last_idx]) {
2256 reflowed[last_idx].push_str(break_marker);
2257 }
2258 }
2259
2260 let reflowed_text = reflowed.join(line_ending);
2261
2262 // Preserve trailing newline if the original paragraph had one
2263 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2264 format!("{reflowed_text}{line_ending}")
2265 } else {
2266 reflowed_text
2267 };
2268
2269 // Get the original text to compare
2270 let original_text = &ctx.content[byte_range.clone()];
2271
2272 // Only generate a warning if the replacement is different from the original
2273 if original_text != replacement {
2274 // Create warning with actual fix
2275 // In default mode, report the specific line that violates
2276 // In normalize mode, report the whole paragraph
2277 // In sentence-per-line mode, report the entire paragraph
2278 let (warning_line, warning_end_line) = match config.reflow_mode {
2279 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2280 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2281 // Highlight the entire paragraph that needs reformatting
2282 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2283 }
2284 ReflowMode::Default => {
2285 // Find the first line that exceeds the limit
2286 let mut violating_line = paragraph_start;
2287 for (idx, line) in paragraph_lines.iter().enumerate() {
2288 if self.calculate_effective_length(line) > config.line_length.get() {
2289 violating_line = paragraph_start + idx;
2290 break;
2291 }
2292 }
2293 (violating_line + 1, violating_line + 1)
2294 }
2295 };
2296
2297 warnings.push(LintWarning {
2298 rule_name: Some(self.name().to_string()),
2299 message: match config.reflow_mode {
2300 ReflowMode::Normalize => format!(
2301 "Paragraph could be normalized to use line length of {} characters",
2302 config.line_length.get()
2303 ),
2304 ReflowMode::SentencePerLine => {
2305 let num_sentences = split_into_sentences(¶graph_text).len();
2306 if paragraph_lines.len() == 1 {
2307 // Single line with multiple sentences
2308 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2309 } else {
2310 let num_lines = paragraph_lines.len();
2311 // Multiple lines - could be split sentences or mixed
2312 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2313 }
2314 },
2315 ReflowMode::SemanticLineBreaks => {
2316 let num_sentences = split_into_sentences(¶graph_text).len();
2317 format!(
2318 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2319 )
2320 },
2321 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2322 },
2323 line: warning_line,
2324 column: 1,
2325 end_line: warning_end_line,
2326 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2327 severity: Severity::Warning,
2328 fix: Some(crate::rule::Fix {
2329 range: byte_range,
2330 replacement,
2331 }),
2332 });
2333 }
2334 }
2335 }
2336
2337 warnings
2338 }
2339
2340 /// Calculate string length based on the configured length mode
2341 fn calculate_string_length(&self, s: &str) -> usize {
2342 match self.config.length_mode {
2343 LengthMode::Chars => s.chars().count(),
2344 LengthMode::Visual => s.width(),
2345 LengthMode::Bytes => s.len(),
2346 }
2347 }
2348
2349 /// Calculate effective line length
2350 ///
2351 /// Returns the actual display length of the line using the configured length mode.
2352 fn calculate_effective_length(&self, line: &str) -> usize {
2353 self.calculate_string_length(line)
2354 }
2355
2356 /// Calculate line length with inline link/image URLs removed.
2357 ///
2358 /// For each inline link `[text](url)` or image `` on the line,
2359 /// computes the "savings" from removing the URL portion (keeping only `[text]`
2360 /// or `![alt]`). Returns `effective_length - total_savings`.
2361 ///
2362 /// Handles nested constructs (e.g., `[](url)`) by only counting the
2363 /// outermost construct to avoid double-counting.
2364 fn calculate_text_only_length(
2365 &self,
2366 effective_length: usize,
2367 line_number: usize,
2368 ctx: &crate::lint_context::LintContext,
2369 ) -> usize {
2370 let line_range = ctx.line_index.line_content_range(line_number);
2371 let line_byte_end = line_range.end;
2372
2373 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2374 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2375
2376 for link in &ctx.links {
2377 if link.line != line_number || link.is_reference {
2378 continue;
2379 }
2380 if !matches!(link.link_type, LinkType::Inline) {
2381 continue;
2382 }
2383 // Skip cross-line links
2384 if link.byte_end > line_byte_end {
2385 continue;
2386 }
2387 // `[text]` in configured length mode
2388 let text_only_len = 2 + self.calculate_string_length(&link.text);
2389 constructs.push((link.byte_offset, link.byte_end, text_only_len));
2390 }
2391
2392 for image in &ctx.images {
2393 if image.line != line_number || image.is_reference {
2394 continue;
2395 }
2396 if !matches!(image.link_type, LinkType::Inline) {
2397 continue;
2398 }
2399 // Skip cross-line images
2400 if image.byte_end > line_byte_end {
2401 continue;
2402 }
2403 // `![alt]` in configured length mode
2404 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2405 constructs.push((image.byte_offset, image.byte_end, text_only_len));
2406 }
2407
2408 if constructs.is_empty() {
2409 return effective_length;
2410 }
2411
2412 // Sort by byte offset to handle overlapping/nested constructs
2413 constructs.sort_by_key(|&(start, _, _)| start);
2414
2415 let mut total_savings: usize = 0;
2416 let mut last_end: usize = 0;
2417
2418 for (start, end, text_only_len) in &constructs {
2419 // Skip constructs nested inside a previously counted one
2420 if *start < last_end {
2421 continue;
2422 }
2423 // Full construct length in configured length mode
2424 let full_source = &ctx.content[*start..*end];
2425 let full_len = self.calculate_string_length(full_source);
2426 total_savings += full_len.saturating_sub(*text_only_len);
2427 last_end = *end;
2428 }
2429
2430 effective_length.saturating_sub(total_savings)
2431 }
2432}