rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_list_item,
26 is_standalone_link_or_image_line, split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37 pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42 line_idx: usize,
43 data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48 Self {
49 config: MD013Config {
50 line_length: crate::types::LineLength::new(line_length),
51 code_blocks,
52 tables,
53 headings,
54 paragraphs: true, // Default to true for backwards compatibility
55 strict,
56 reflow: false,
57 reflow_mode: ReflowMode::default(),
58 length_mode: LengthMode::default(),
59 abbreviations: Vec::new(),
60 },
61 }
62 }
63
64 pub fn from_config_struct(config: MD013Config) -> Self {
65 Self { config }
66 }
67
68 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
69 fn reflow_length_mode(&self) -> ReflowLengthMode {
70 match self.config.length_mode {
71 LengthMode::Chars => ReflowLengthMode::Chars,
72 LengthMode::Visual => ReflowLengthMode::Visual,
73 LengthMode::Bytes => ReflowLengthMode::Bytes,
74 }
75 }
76
77 fn should_ignore_line(
78 &self,
79 line: &str,
80 _lines: &[&str],
81 current_line: usize,
82 ctx: &crate::lint_context::LintContext,
83 ) -> bool {
84 if self.config.strict {
85 return false;
86 }
87
88 // Quick check for common patterns before expensive regex
89 let trimmed = line.trim();
90
91 // Only skip if the entire line is a URL (quick check first)
92 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
93 return true;
94 }
95
96 // Only skip if the entire line is an image reference (quick check first)
97 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
98 return true;
99 }
100
101 // Note: link reference definitions are handled as always-exempt (even in strict mode)
102 // in the main check loop, so they don't need to be checked here.
103
104 // Code blocks with long strings (only check if in code block)
105 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
106 && !trimmed.is_empty()
107 && !line.contains(' ')
108 && !line.contains('\t')
109 {
110 return true;
111 }
112
113 false
114 }
115
116 /// Check if rule should skip based on provided config (used for inline config support)
117 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
118 // Skip if content is empty
119 if ctx.content.is_empty() {
120 return true;
121 }
122
123 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
124 if config.reflow
125 && (config.reflow_mode == ReflowMode::SentencePerLine
126 || config.reflow_mode == ReflowMode::SemanticLineBreaks
127 || config.reflow_mode == ReflowMode::Normalize)
128 {
129 return false;
130 }
131
132 // Quick check: if total content is shorter than line limit, definitely skip
133 if ctx.content.len() <= config.line_length.get() {
134 return true;
135 }
136
137 // Skip if no line exceeds the limit
138 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
139 }
140}
141
142impl Rule for MD013LineLength {
143 fn name(&self) -> &'static str {
144 "MD013"
145 }
146
147 fn description(&self) -> &'static str {
148 "Line length should not be excessive"
149 }
150
151 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
152 // Use pre-parsed inline config from LintContext
153 let config_override = ctx.inline_config().get_rule_config("MD013");
154
155 // Apply configuration override if present
156 let effective_config = if let Some(json_config) = config_override {
157 if let Some(obj) = json_config.as_object() {
158 let mut config = self.config.clone();
159 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
160 config.line_length = crate::types::LineLength::new(line_length as usize);
161 }
162 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
163 config.code_blocks = code_blocks;
164 }
165 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
166 config.tables = tables;
167 }
168 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
169 config.headings = headings;
170 }
171 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
172 config.strict = strict;
173 }
174 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
175 config.reflow = reflow;
176 }
177 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
178 config.reflow_mode = match reflow_mode {
179 "default" => ReflowMode::Default,
180 "normalize" => ReflowMode::Normalize,
181 "sentence-per-line" => ReflowMode::SentencePerLine,
182 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
183 _ => ReflowMode::default(),
184 };
185 }
186 config
187 } else {
188 self.config.clone()
189 }
190 } else {
191 self.config.clone()
192 };
193
194 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
195 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
196 if self.should_skip_with_config(ctx, &effective_config)
197 && !(effective_config.reflow
198 && (effective_config.reflow_mode == ReflowMode::Normalize
199 || effective_config.reflow_mode == ReflowMode::SentencePerLine
200 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
201 {
202 return Ok(Vec::new());
203 }
204
205 // Direct implementation without DocumentStructure
206 let mut warnings = Vec::new();
207
208 // Special handling: line_length = 0 means "no line length limit"
209 // Skip all line length checks, but still allow reflow if enabled
210 let skip_length_checks = effective_config.line_length.is_unlimited();
211
212 // Pre-filter lines that could be problematic to avoid processing all lines
213 let mut candidate_lines = Vec::new();
214 if !skip_length_checks {
215 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
216 // Skip front matter - it should never be linted
217 if line_info.in_front_matter {
218 continue;
219 }
220
221 // Quick length check first
222 if line_info.byte_len > effective_config.line_length.get() {
223 candidate_lines.push(line_idx);
224 }
225 }
226 }
227
228 // If no candidate lines and not in normalize or sentence-per-line mode, early return
229 if candidate_lines.is_empty()
230 && !(effective_config.reflow
231 && (effective_config.reflow_mode == ReflowMode::Normalize
232 || effective_config.reflow_mode == ReflowMode::SentencePerLine
233 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
234 {
235 return Ok(warnings);
236 }
237
238 let lines = ctx.raw_lines();
239
240 // Create a quick lookup set for heading lines
241 // We need this for both the heading skip check AND the paragraphs check
242 let heading_lines_set: std::collections::HashSet<usize> = ctx
243 .lines
244 .iter()
245 .enumerate()
246 .filter(|(_, line)| line.heading.is_some())
247 .map(|(idx, _)| idx + 1)
248 .collect();
249
250 // Use pre-computed table blocks from context
251 // We need this for both the table skip check AND the paragraphs check
252 let table_blocks = &ctx.table_blocks;
253 let mut table_lines_set = std::collections::HashSet::new();
254 for table in table_blocks {
255 table_lines_set.insert(table.header_line + 1);
256 table_lines_set.insert(table.delimiter_line + 1);
257 for &line in &table.content_lines {
258 table_lines_set.insert(line + 1);
259 }
260 }
261
262 // Process candidate lines for line length checks
263 for &line_idx in &candidate_lines {
264 let line_number = line_idx + 1;
265 let line = lines[line_idx];
266
267 // Calculate actual line length (used in warning messages)
268 let effective_length = self.calculate_effective_length(line);
269
270 // Use single line length limit for all content
271 let line_limit = effective_config.line_length.get();
272
273 // In non-strict mode, forgive the trailing non-whitespace run.
274 // If the line only exceeds the limit because of a long token at the end
275 // (URL, link chain, identifier), it passes. This matches markdownlint's
276 // behavior: line.replace(/\S*$/u, "#")
277 let check_length = if effective_config.strict {
278 effective_length
279 } else {
280 match line.rfind(char::is_whitespace) {
281 Some(pos) => {
282 let ws_char = line[pos..].chars().next().unwrap();
283 let prefix_end = pos + ws_char.len_utf8();
284 self.calculate_string_length(&line[..prefix_end]) + 1
285 }
286 None => 1, // No whitespace — entire line is a single token
287 }
288 };
289
290 // Skip lines where the check length is within the limit
291 if check_length <= line_limit {
292 continue;
293 }
294
295 // Semantic link understanding: suppress when excess comes entirely from inline URLs
296 if !effective_config.strict {
297 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
298 if text_only_length <= line_limit {
299 continue;
300 }
301 }
302
303 // Skip mkdocstrings and pymdown blocks (already handled by LintContext)
304 if ctx.lines[line_idx].in_mkdocstrings || ctx.lines[line_idx].in_pymdown_block {
305 continue;
306 }
307
308 // Link reference definitions are always exempt, even in strict mode.
309 // There's no way to shorten them without breaking the URL.
310 // Also check after stripping list markers, since list items may
311 // contain link ref defs as their content.
312 {
313 let trimmed = line.trim();
314 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
315 continue;
316 }
317 if is_list_item(trimmed) {
318 let (_, content) = extract_list_marker_and_content(trimmed);
319 let content_trimmed = content.trim();
320 if content_trimmed.starts_with('[')
321 && content_trimmed.contains("]:")
322 && LINK_REF_PATTERN.is_match(content_trimmed)
323 {
324 continue;
325 }
326 }
327 }
328
329 // Skip various block types efficiently
330 if !effective_config.strict {
331 // Lines whose only content is a link/image are exempt.
332 // After stripping list markers, blockquote markers, and emphasis,
333 // if only a link or image remains, there is no way to shorten it.
334 if is_standalone_link_or_image_line(line) {
335 continue;
336 }
337
338 // Skip setext heading underlines
339 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
340 continue;
341 }
342
343 // Skip block elements according to config flags
344 // The flags mean: true = check these elements, false = skip these elements
345 // So we skip when the flag is FALSE and the line is in that element type
346 if (!effective_config.headings && heading_lines_set.contains(&line_number))
347 || (!effective_config.code_blocks
348 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
349 || (!effective_config.tables && table_lines_set.contains(&line_number))
350 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
351 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
352 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
353 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
354 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
355 || ctx.line_info(line_number).is_some_and(|info| info.in_pymdown_block)
356 {
357 continue;
358 }
359
360 // Check if this is a paragraph/regular text line
361 // If paragraphs = false, skip lines that are NOT in special blocks
362 if !effective_config.paragraphs {
363 let is_special_block = heading_lines_set.contains(&line_number)
364 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
365 || table_lines_set.contains(&line_number)
366 || ctx.lines[line_number - 1].blockquote.is_some()
367 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
368 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
369 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
370 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
371 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
372 || ctx
373 .line_info(line_number)
374 .is_some_and(|info| info.in_mkdocs_container());
375
376 // Skip regular paragraph text when paragraphs = false
377 if !is_special_block {
378 continue;
379 }
380 }
381
382 // Skip lines that are only a URL, image ref, or link ref
383 if self.should_ignore_line(line, lines, line_idx, ctx) {
384 continue;
385 }
386 }
387
388 // In sentence-per-line mode, check if this is a single long sentence
389 // If so, emit a warning without a fix (user must manually rephrase)
390 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
391 let sentences = split_into_sentences(line.trim());
392 if sentences.len() == 1 {
393 // Single sentence that's too long - warn but don't auto-fix
394 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
395
396 let (start_line, start_col, end_line, end_col) =
397 calculate_excess_range(line_number, line, line_limit);
398
399 warnings.push(LintWarning {
400 rule_name: Some(self.name().to_string()),
401 message,
402 line: start_line,
403 column: start_col,
404 end_line,
405 end_column: end_col,
406 severity: Severity::Warning,
407 fix: None, // No auto-fix for long single sentences
408 });
409 continue;
410 }
411 // Multiple sentences will be handled by paragraph-based reflow
412 continue;
413 }
414
415 // In semantic-line-breaks mode, skip per-line checks —
416 // all reflow is handled at the paragraph level with cascading splits
417 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
418 continue;
419 }
420
421 // Don't provide fix for individual lines when reflow is enabled
422 // Paragraph-based fixes will be handled separately
423 let fix = None;
424
425 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
426
427 // Calculate precise character range for the excess portion
428 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
429
430 warnings.push(LintWarning {
431 rule_name: Some(self.name().to_string()),
432 message,
433 line: start_line,
434 column: start_col,
435 end_line,
436 end_column: end_col,
437 severity: Severity::Warning,
438 fix,
439 });
440 }
441
442 // If reflow is enabled, generate paragraph-based fixes
443 if effective_config.reflow {
444 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
445 // Merge paragraph warnings with line warnings, removing duplicates
446 for pw in paragraph_warnings {
447 // Remove any line warnings that overlap with this paragraph
448 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
449 warnings.push(pw);
450 }
451 }
452
453 Ok(warnings)
454 }
455
456 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
457 // For CLI usage, apply fixes from warnings
458 // LSP will use the warning-based fixes directly
459 let warnings = self.check(ctx)?;
460
461 // If there are no fixes, return content unchanged
462 if !warnings.iter().any(|w| w.fix.is_some()) {
463 return Ok(ctx.content.to_string());
464 }
465
466 // Apply warning-based fixes
467 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
468 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
469 }
470
471 fn as_any(&self) -> &dyn std::any::Any {
472 self
473 }
474
475 fn category(&self) -> RuleCategory {
476 RuleCategory::Whitespace
477 }
478
479 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
480 self.should_skip_with_config(ctx, &self.config)
481 }
482
483 fn default_config_section(&self) -> Option<(String, toml::Value)> {
484 let default_config = MD013Config::default();
485 let json_value = serde_json::to_value(&default_config).ok()?;
486 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
487
488 if let toml::Value::Table(table) = toml_value {
489 if !table.is_empty() {
490 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
491 } else {
492 None
493 }
494 } else {
495 None
496 }
497 }
498
499 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
500 let mut aliases = std::collections::HashMap::new();
501 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
502 Some(aliases)
503 }
504
505 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
506 where
507 Self: Sized,
508 {
509 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
510 // Use global line_length if rule-specific config still has default value
511 if rule_config.line_length.get() == 80 {
512 rule_config.line_length = config.global.line_length;
513 }
514 Box::new(Self::from_config_struct(rule_config))
515 }
516}
517
518impl MD013LineLength {
519 fn is_blockquote_content_boundary(
520 &self,
521 content: &str,
522 line_num: usize,
523 ctx: &crate::lint_context::LintContext,
524 ) -> bool {
525 let trimmed = content.trim();
526
527 trimmed.is_empty()
528 || ctx.line_info(line_num).is_some_and(|info| {
529 info.in_code_block
530 || info.in_front_matter
531 || info.in_html_block
532 || info.in_html_comment
533 || info.in_esm_block
534 || info.in_jsx_expression
535 || info.in_mdx_comment
536 || info.in_mkdocstrings
537 || info.in_pymdown_block
538 || info.in_mkdocs_container()
539 || info.is_div_marker
540 })
541 || trimmed.starts_with('#')
542 || trimmed.starts_with("```")
543 || trimmed.starts_with("~~~")
544 || trimmed.starts_with('>')
545 || TableUtils::is_potential_table_row(content)
546 || is_list_item(trimmed)
547 || is_horizontal_rule(trimmed)
548 || (trimmed.starts_with('[') && content.contains("]:"))
549 || is_template_directive_only(content)
550 || is_standalone_attr_list(content)
551 || is_snippet_block_delimiter(content)
552 || is_github_alert_marker(trimmed)
553 }
554
555 fn generate_blockquote_paragraph_fix(
556 &self,
557 ctx: &crate::lint_context::LintContext,
558 config: &MD013Config,
559 lines: &[&str],
560 line_index: &LineIndex,
561 start_idx: usize,
562 line_ending: &str,
563 ) -> (Option<LintWarning>, usize) {
564 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
565 return (None, start_idx + 1);
566 };
567 let target_level = start_bq.nesting_level;
568
569 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
570 let mut i = start_idx;
571
572 while i < lines.len() {
573 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
574 break;
575 }
576
577 let line_num = i + 1;
578 if line_num > ctx.lines.len() {
579 break;
580 }
581
582 if lines[i].trim().is_empty() {
583 break;
584 }
585
586 let line_bq = ctx.lines[i].blockquote.as_deref();
587 if let Some(bq) = line_bq {
588 if bq.nesting_level != target_level {
589 break;
590 }
591
592 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
593 break;
594 }
595
596 collected.push(CollectedBlockquoteLine {
597 line_idx: i,
598 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
599 });
600 i += 1;
601 continue;
602 }
603
604 let lazy_content = lines[i].trim_start();
605 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
606 break;
607 }
608
609 collected.push(CollectedBlockquoteLine {
610 line_idx: i,
611 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
612 });
613 i += 1;
614 }
615
616 if collected.is_empty() {
617 return (None, start_idx + 1);
618 }
619
620 let next_idx = i;
621 let paragraph_start = collected[0].line_idx;
622 let end_line = collected[collected.len() - 1].line_idx;
623 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
624 let paragraph_text = line_data
625 .iter()
626 .map(|d| d.content.as_str())
627 .collect::<Vec<_>>()
628 .join(" ");
629
630 let contains_definition_list = line_data
631 .iter()
632 .any(|d| crate::utils::is_definition_list_item(&d.content));
633 if contains_definition_list {
634 return (None, next_idx);
635 }
636
637 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
638 if contains_snippets {
639 return (None, next_idx);
640 }
641
642 let needs_reflow = match config.reflow_mode {
643 ReflowMode::Normalize => line_data.len() > 1,
644 ReflowMode::SentencePerLine => {
645 let sentences = split_into_sentences(¶graph_text);
646 sentences.len() > 1 || line_data.len() > 1
647 }
648 ReflowMode::SemanticLineBreaks => {
649 let sentences = split_into_sentences(¶graph_text);
650 sentences.len() > 1
651 || line_data.len() > 1
652 || collected
653 .iter()
654 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
655 }
656 ReflowMode::Default => collected
657 .iter()
658 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
659 };
660
661 if !needs_reflow {
662 return (None, next_idx);
663 }
664
665 let fallback_prefix = start_bq.prefix.clone();
666 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
667 let continuation_style = blockquote_continuation_style(&line_data);
668
669 let reflow_line_length = if config.line_length.is_unlimited() {
670 usize::MAX
671 } else {
672 config
673 .line_length
674 .get()
675 .saturating_sub(self.calculate_string_length(&explicit_prefix))
676 .max(1)
677 };
678
679 let reflow_options = crate::utils::text_reflow::ReflowOptions {
680 line_length: reflow_line_length,
681 break_on_sentences: true,
682 preserve_breaks: false,
683 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
684 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
685 abbreviations: config.abbreviations_for_reflow(),
686 length_mode: self.reflow_length_mode(),
687 attr_lists: ctx.flavor.supports_attr_lists(),
688 };
689
690 let reflowed_with_style =
691 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
692
693 if reflowed_with_style.is_empty() {
694 return (None, next_idx);
695 }
696
697 let reflowed_text = reflowed_with_style.join(line_ending);
698
699 let start_range = line_index.whole_line_range(paragraph_start + 1);
700 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
701 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
702 } else {
703 line_index.whole_line_range(end_line + 1)
704 };
705 let byte_range = start_range.start..end_range.end;
706
707 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
708 format!("{reflowed_text}{line_ending}")
709 } else {
710 reflowed_text
711 };
712
713 let original_text = &ctx.content[byte_range.clone()];
714 if original_text == replacement {
715 return (None, next_idx);
716 }
717
718 let (warning_line, warning_end_line) = match config.reflow_mode {
719 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
720 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
721 ReflowMode::Default => {
722 let violating_line = collected
723 .iter()
724 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
725 .map(|line| line.line_idx + 1)
726 .unwrap_or(paragraph_start + 1);
727 (violating_line, violating_line)
728 }
729 };
730
731 let warning = LintWarning {
732 rule_name: Some(self.name().to_string()),
733 message: match config.reflow_mode {
734 ReflowMode::Normalize => format!(
735 "Paragraph could be normalized to use line length of {} characters",
736 config.line_length.get()
737 ),
738 ReflowMode::SentencePerLine => {
739 let num_sentences = split_into_sentences(¶graph_text).len();
740 if line_data.len() == 1 {
741 format!("Line contains {num_sentences} sentences (one sentence per line required)")
742 } else {
743 let num_lines = line_data.len();
744 format!(
745 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
746 )
747 }
748 }
749 ReflowMode::SemanticLineBreaks => {
750 let num_sentences = split_into_sentences(¶graph_text).len();
751 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
752 }
753 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
754 },
755 line: warning_line,
756 column: 1,
757 end_line: warning_end_line,
758 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
759 severity: Severity::Warning,
760 fix: Some(crate::rule::Fix {
761 range: byte_range,
762 replacement,
763 }),
764 };
765
766 (Some(warning), next_idx)
767 }
768
769 /// Generate paragraph-based fixes
770 fn generate_paragraph_fixes(
771 &self,
772 ctx: &crate::lint_context::LintContext,
773 config: &MD013Config,
774 lines: &[&str],
775 ) -> Vec<LintWarning> {
776 let mut warnings = Vec::new();
777 let line_index = LineIndex::new(ctx.content);
778
779 // Detect the content's line ending style to preserve it in replacements.
780 // The LSP receives content from editors which may use CRLF (Windows).
781 // Replacements must match the original line endings to avoid false positives.
782 let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
783
784 let mut i = 0;
785 while i < lines.len() {
786 let line_num = i + 1;
787
788 // Handle blockquote paragraphs with style-preserving reflow.
789 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
790 let (warning, next_idx) =
791 self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
792 if let Some(warning) = warning {
793 warnings.push(warning);
794 }
795 i = next_idx;
796 continue;
797 }
798
799 // Skip special structures (but NOT MkDocs containers - those get special handling)
800 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
801 info.in_code_block
802 || info.in_front_matter
803 || info.in_html_block
804 || info.in_html_comment
805 || info.in_esm_block
806 || info.in_jsx_expression
807 || info.in_mdx_comment
808 || info.in_mkdocstrings
809 || info.in_pymdown_block
810 });
811
812 if should_skip_due_to_line_info
813 || lines[i].trim().starts_with('#')
814 || TableUtils::is_potential_table_row(lines[i])
815 || lines[i].trim().is_empty()
816 || is_horizontal_rule(lines[i].trim())
817 || is_template_directive_only(lines[i])
818 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
819 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
820 {
821 i += 1;
822 continue;
823 }
824
825 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
826 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
827 // Skip admonition/tab marker lines — only reflow their indented content
828 let current_line = lines[i];
829 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
830 i += 1;
831 continue;
832 }
833
834 let container_start = i;
835
836 // Detect the actual indent level from the first content line
837 // (supports nested admonitions with 8+ spaces)
838 let first_line = lines[i];
839 let base_indent_len = first_line.len() - first_line.trim_start().len();
840 let base_indent: String = " ".repeat(base_indent_len);
841
842 // Collect consecutive MkDocs container paragraph lines
843 let mut container_lines: Vec<&str> = Vec::new();
844 while i < lines.len() {
845 let current_line_num = i + 1;
846 let line_info = ctx.line_info(current_line_num);
847
848 // Stop if we leave the MkDocs container
849 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
850 break;
851 }
852
853 let line = lines[i];
854
855 // Stop at paragraph boundaries within the container
856 if line.trim().is_empty() {
857 break;
858 }
859
860 // Skip list items, code blocks, headings within containers
861 if is_list_item(line.trim())
862 || line.trim().starts_with("```")
863 || line.trim().starts_with("~~~")
864 || line.trim().starts_with('#')
865 {
866 break;
867 }
868
869 container_lines.push(line);
870 i += 1;
871 }
872
873 if container_lines.is_empty() {
874 // Must advance i to avoid infinite loop when we encounter
875 // non-paragraph content (code block, list, heading, empty line)
876 // at the start of an MkDocs container
877 i += 1;
878 continue;
879 }
880
881 // Strip the base indent from each line and join for reflow
882 let stripped_lines: Vec<&str> = container_lines
883 .iter()
884 .map(|line| {
885 if line.starts_with(&base_indent) {
886 &line[base_indent_len..]
887 } else {
888 line.trim_start()
889 }
890 })
891 .collect();
892 let paragraph_text = stripped_lines.join(" ");
893
894 // Check if reflow is needed
895 let needs_reflow = match config.reflow_mode {
896 ReflowMode::Normalize => container_lines.len() > 1,
897 ReflowMode::SentencePerLine => {
898 let sentences = split_into_sentences(¶graph_text);
899 sentences.len() > 1 || container_lines.len() > 1
900 }
901 ReflowMode::SemanticLineBreaks => {
902 let sentences = split_into_sentences(¶graph_text);
903 sentences.len() > 1
904 || container_lines.len() > 1
905 || container_lines
906 .iter()
907 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
908 }
909 ReflowMode::Default => container_lines
910 .iter()
911 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
912 };
913
914 if !needs_reflow {
915 continue;
916 }
917
918 // Calculate byte range for this container paragraph
919 let start_range = line_index.whole_line_range(container_start + 1);
920 let end_line = container_start + container_lines.len() - 1;
921 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
922 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
923 } else {
924 line_index.whole_line_range(end_line + 1)
925 };
926 let byte_range = start_range.start..end_range.end;
927
928 // Reflow with adjusted line length (accounting for the 4-space indent)
929 let reflow_line_length = if config.line_length.is_unlimited() {
930 usize::MAX
931 } else {
932 config.line_length.get().saturating_sub(base_indent_len).max(1)
933 };
934 let reflow_options = crate::utils::text_reflow::ReflowOptions {
935 line_length: reflow_line_length,
936 break_on_sentences: true,
937 preserve_breaks: false,
938 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
939 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
940 abbreviations: config.abbreviations_for_reflow(),
941 length_mode: self.reflow_length_mode(),
942 attr_lists: ctx.flavor.supports_attr_lists(),
943 };
944 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
945
946 // Re-add the 4-space indent to each reflowed line
947 let reflowed_with_indent: Vec<String> =
948 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
949 let reflowed_text = reflowed_with_indent.join(line_ending);
950
951 // Preserve trailing newline
952 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
953 format!("{reflowed_text}{line_ending}")
954 } else {
955 reflowed_text
956 };
957
958 // Only generate a warning if the replacement is different
959 let original_text = &ctx.content[byte_range.clone()];
960 if original_text != replacement {
961 warnings.push(LintWarning {
962 rule_name: Some(self.name().to_string()),
963 message: format!(
964 "Line length {} exceeds {} characters (in MkDocs container)",
965 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
966 config.line_length.get()
967 ),
968 line: container_start + 1,
969 column: 1,
970 end_line: end_line + 1,
971 end_column: lines[end_line].len() + 1,
972 severity: Severity::Warning,
973 fix: Some(crate::rule::Fix {
974 range: byte_range,
975 replacement,
976 }),
977 });
978 }
979 continue;
980 }
981
982 // Helper function to detect semantic line markers
983 let is_semantic_line = |content: &str| -> bool {
984 let trimmed = content.trim_start();
985 let semantic_markers = [
986 "NOTE:",
987 "WARNING:",
988 "IMPORTANT:",
989 "CAUTION:",
990 "TIP:",
991 "DANGER:",
992 "HINT:",
993 "INFO:",
994 ];
995 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
996 };
997
998 // Helper function to detect fence markers (opening or closing)
999 let is_fence_marker = |content: &str| -> bool {
1000 let trimmed = content.trim_start();
1001 trimmed.starts_with("```") || trimmed.starts_with("~~~")
1002 };
1003
1004 // Check if this is a list item - handle it specially
1005 let trimmed = lines[i].trim();
1006 if is_list_item(trimmed) {
1007 // Collect the entire list item including continuation lines
1008 let list_start = i;
1009 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
1010 let marker_len = marker.len();
1011
1012 // MkDocs flavor requires at least 4 spaces for list continuation
1013 // after a blank line (multi-paragraph list items). For non-blank
1014 // continuation (lines directly following the marker line), use
1015 // the natural marker width so that 2-space indent is recognized.
1016 let min_continuation_indent = if ctx.flavor.requires_strict_list_indent() {
1017 marker_len.max(4)
1018 } else {
1019 marker_len
1020 };
1021 let content_continuation_indent = marker_len;
1022
1023 // Track lines and their types (content, code block, fence, nested list)
1024 #[derive(Clone)]
1025 enum LineType {
1026 Content(String),
1027 CodeBlock(String, usize), // content and original indent
1028 NestedListItem(String, usize), // full line content and original indent
1029 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
1030 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1031 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
1032 AdmonitionHeader(String, usize), // header text (e.g. "!!! note") and original indent
1033 AdmonitionContent(String, usize), // body content text and original indent
1034 Empty,
1035 }
1036
1037 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1038 i += 1;
1039
1040 // Collect continuation lines using ctx.lines for metadata
1041 while i < lines.len() {
1042 let line_info = &ctx.lines[i];
1043
1044 // Use pre-computed is_blank from ctx
1045 if line_info.is_blank {
1046 // Empty line - check if next line is indented (part of list item)
1047 if i + 1 < lines.len() {
1048 let next_info = &ctx.lines[i + 1];
1049
1050 // Check if next line is indented enough to be continuation
1051 if !next_info.is_blank && next_info.indent >= min_continuation_indent {
1052 // This blank line is between paragraphs/blocks in the list item
1053 list_item_lines.push(LineType::Empty);
1054 i += 1;
1055 continue;
1056 }
1057 }
1058 // No indented line after blank, end of list item
1059 break;
1060 }
1061
1062 // Use pre-computed indent from ctx
1063 let indent = line_info.indent;
1064
1065 // Valid continuation must be indented at least content_continuation_indent.
1066 // For non-blank continuation, use marker_len (e.g. 2 for "- ").
1067 // MkDocs strict 4-space requirement applies only after blank lines.
1068 if indent >= content_continuation_indent {
1069 let trimmed = line_info.content(ctx.content).trim();
1070
1071 // Use pre-computed in_code_block from ctx
1072 if line_info.in_code_block {
1073 list_item_lines.push(LineType::CodeBlock(
1074 line_info.content(ctx.content)[indent..].to_string(),
1075 indent,
1076 ));
1077 i += 1;
1078 continue;
1079 }
1080
1081 // Check for MkDocs admonition lines inside list items.
1082 // The flavor detection marks these with in_admonition, so we
1083 // can classify them as admonition header or body content.
1084 // Code fence markers (``` or ~~~) within admonitions must be
1085 // classified as CodeBlock so the block builder preserves them
1086 // verbatim instead of merging them into paragraph text.
1087 if line_info.in_admonition {
1088 let raw_content = line_info.content(ctx.content);
1089 if mkdocs_admonitions::is_admonition_start(raw_content) {
1090 let header_text = raw_content[indent..].trim_end().to_string();
1091 list_item_lines.push(LineType::AdmonitionHeader(header_text, indent));
1092 } else {
1093 let body_text = raw_content[indent..].trim_end().to_string();
1094 if is_fence_marker(&body_text) {
1095 list_item_lines.push(LineType::CodeBlock(body_text, indent));
1096 } else {
1097 list_item_lines.push(LineType::AdmonitionContent(body_text, indent));
1098 }
1099 }
1100 i += 1;
1101 continue;
1102 }
1103
1104 // Check if this is a SIBLING list item (breaks parent)
1105 // Nested lists are indented >= marker_len and are PART of the parent item
1106 // Siblings are at indent < marker_len (at or before parent marker)
1107 if is_list_item(trimmed) && indent < marker_len {
1108 // This is a sibling item at same or higher level - end parent item
1109 break;
1110 }
1111
1112 // Check if this is a NESTED list item marker
1113 // Nested lists should be processed separately UNLESS they're part of a
1114 // multi-paragraph list item (indicated by a blank line before them OR
1115 // it's a continuation of an already-started nested list)
1116 if is_list_item(trimmed) && indent >= marker_len {
1117 // Check if there was a blank line before this (multi-paragraph context)
1118 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
1119
1120 // Check if we've already seen nested list content (another nested item)
1121 let has_nested_content = list_item_lines.iter().any(|line| {
1122 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
1123 || matches!(line, LineType::NestedListItem(_, _))
1124 });
1125
1126 if !has_blank_before && !has_nested_content {
1127 // Single-paragraph context with no prior nested items: starts a new item
1128 // End parent collection; nested list will be processed next
1129 break;
1130 }
1131 // else: multi-paragraph context or continuation of nested list, keep collecting
1132 // Mark this as a nested list item to preserve its structure
1133 list_item_lines.push(LineType::NestedListItem(
1134 line_info.content(ctx.content)[indent..].to_string(),
1135 indent,
1136 ));
1137 i += 1;
1138 continue;
1139 }
1140
1141 // Normal continuation vs indented code block.
1142 // Use min_continuation_indent for the threshold since
1143 // code blocks start 4 spaces beyond the expected content
1144 // level (which is min_continuation_indent for MkDocs).
1145 if indent <= min_continuation_indent + 3 {
1146 // Extract content (remove indentation and trailing whitespace)
1147 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1148 // See: https://github.com/rvben/rumdl/issues/76
1149 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1150
1151 // Check if this is a div marker (::: opening or closing)
1152 // These must be preserved on their own line, not merged into paragraphs
1153 if line_info.is_div_marker {
1154 list_item_lines.push(LineType::DivMarker(content));
1155 }
1156 // Check if this is a fence marker (opening or closing)
1157 // These should be treated as code block lines, not paragraph content
1158 else if is_fence_marker(&content) {
1159 list_item_lines.push(LineType::CodeBlock(content, indent));
1160 }
1161 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1162 else if is_semantic_line(&content) {
1163 list_item_lines.push(LineType::SemanticLine(content));
1164 }
1165 // Check if this is a snippet block delimiter (-8<- or --8<--)
1166 // These must be preserved on their own lines for MkDocs Snippets extension
1167 else if is_snippet_block_delimiter(&content) {
1168 list_item_lines.push(LineType::SnippetLine(content));
1169 } else {
1170 list_item_lines.push(LineType::Content(content));
1171 }
1172 i += 1;
1173 } else {
1174 // indent >= min_continuation_indent + 4: indented code block
1175 list_item_lines.push(LineType::CodeBlock(
1176 line_info.content(ctx.content)[indent..].to_string(),
1177 indent,
1178 ));
1179 i += 1;
1180 }
1181 } else {
1182 // Not indented enough, end of list item
1183 break;
1184 }
1185 }
1186
1187 // Determine the output continuation indent.
1188 // Normalize/Default modes canonicalize to min_continuation_indent
1189 // (fixing over-indented continuation). Semantic/SentencePerLine
1190 // modes preserve the user's actual indent since they only fix
1191 // line breaking, not indentation.
1192 let indent_size = match config.reflow_mode {
1193 ReflowMode::SemanticLineBreaks | ReflowMode::SentencePerLine => {
1194 // Find indent of the first plain text continuation line,
1195 // skipping the marker line (index 0), nested list items,
1196 // code blocks, and blank lines.
1197 list_item_lines
1198 .iter()
1199 .enumerate()
1200 .skip(1)
1201 .find_map(|(k, lt)| {
1202 if matches!(lt, LineType::Content(_)) {
1203 Some(ctx.lines[list_start + k].indent)
1204 } else {
1205 None
1206 }
1207 })
1208 .unwrap_or(min_continuation_indent)
1209 }
1210 _ => min_continuation_indent,
1211 };
1212 let expected_indent = " ".repeat(indent_size);
1213
1214 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1215 #[derive(Clone)]
1216 enum Block {
1217 Paragraph(Vec<String>),
1218 Code {
1219 lines: Vec<(String, usize)>, // (content, indent) pairs
1220 has_preceding_blank: bool, // Whether there was a blank line before this block
1221 },
1222 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
1223 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1224 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
1225 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1226 Html {
1227 lines: Vec<String>, // HTML content preserved exactly as-is
1228 has_preceding_blank: bool, // Whether there was a blank line before this block
1229 },
1230 Admonition {
1231 header: String, // e.g. "!!! note" or "??? warning \"Title\""
1232 header_indent: usize, // original indent of the header line
1233 content_lines: Vec<(String, usize)>, // (text, original_indent) pairs for body lines
1234 },
1235 }
1236
1237 // HTML tag detection helpers
1238 // Block-level HTML tags that should trigger HTML block detection
1239 const BLOCK_LEVEL_TAGS: &[&str] = &[
1240 "div",
1241 "details",
1242 "summary",
1243 "section",
1244 "article",
1245 "header",
1246 "footer",
1247 "nav",
1248 "aside",
1249 "main",
1250 "table",
1251 "thead",
1252 "tbody",
1253 "tfoot",
1254 "tr",
1255 "td",
1256 "th",
1257 "ul",
1258 "ol",
1259 "li",
1260 "dl",
1261 "dt",
1262 "dd",
1263 "pre",
1264 "blockquote",
1265 "figure",
1266 "figcaption",
1267 "form",
1268 "fieldset",
1269 "legend",
1270 "hr",
1271 "p",
1272 "h1",
1273 "h2",
1274 "h3",
1275 "h4",
1276 "h5",
1277 "h6",
1278 "style",
1279 "script",
1280 "noscript",
1281 ];
1282
1283 fn is_block_html_opening_tag(line: &str) -> Option<String> {
1284 let trimmed = line.trim();
1285
1286 // Check for HTML comments
1287 if trimmed.starts_with("<!--") {
1288 return Some("!--".to_string());
1289 }
1290
1291 // Check for opening tags
1292 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1293 // Extract tag name from <tagname ...> or <tagname>
1294 let after_bracket = &trimmed[1..];
1295 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1296 let tag_name = after_bracket[..end].to_lowercase();
1297
1298 // Only treat as block if it's a known block-level tag
1299 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1300 return Some(tag_name);
1301 }
1302 }
1303 }
1304 None
1305 }
1306
1307 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1308 let trimmed = line.trim();
1309
1310 // Special handling for HTML comments
1311 if tag_name == "!--" {
1312 return trimmed.ends_with("-->");
1313 }
1314
1315 // Check for closing tags: </tagname> or </tagname ...>
1316 trimmed.starts_with(&format!("</{tag_name}>"))
1317 || trimmed.starts_with(&format!("</{tag_name} "))
1318 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1319 }
1320
1321 fn is_self_closing_tag(line: &str) -> bool {
1322 let trimmed = line.trim();
1323 trimmed.ends_with("/>")
1324 }
1325
1326 let mut blocks: Vec<Block> = Vec::new();
1327 let mut current_paragraph: Vec<String> = Vec::new();
1328 let mut current_code_block: Vec<(String, usize)> = Vec::new();
1329 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
1330 let mut current_html_block: Vec<String> = Vec::new();
1331 let mut html_tag_stack: Vec<String> = Vec::new();
1332 let mut in_code = false;
1333 let mut in_nested_list = false;
1334 let mut in_html_block = false;
1335 let mut had_preceding_blank = false; // Track if we just saw an empty line
1336 let mut code_block_has_preceding_blank = false; // Track blank before current code block
1337 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1338
1339 // Track admonition context for block building
1340 let mut in_admonition_block = false;
1341 let mut admonition_header: Option<(String, usize)> = None; // (header_text, indent)
1342 let mut admonition_content: Vec<(String, usize)> = Vec::new();
1343
1344 // Flush any pending admonition block into `blocks`
1345 let flush_admonition = |blocks: &mut Vec<Block>,
1346 in_admonition: &mut bool,
1347 header: &mut Option<(String, usize)>,
1348 content: &mut Vec<(String, usize)>| {
1349 if *in_admonition {
1350 if let Some((h, hi)) = header.take() {
1351 blocks.push(Block::Admonition {
1352 header: h,
1353 header_indent: hi,
1354 content_lines: std::mem::take(content),
1355 });
1356 }
1357 *in_admonition = false;
1358 }
1359 };
1360
1361 for line in &list_item_lines {
1362 match line {
1363 LineType::Empty => {
1364 if in_admonition_block {
1365 // Blank lines inside admonitions separate paragraphs within the body
1366 admonition_content.push((String::new(), 0));
1367 } else if in_code {
1368 current_code_block.push((String::new(), 0));
1369 } else if in_nested_list {
1370 current_nested_list.push((String::new(), 0));
1371 } else if in_html_block {
1372 // Allow blank lines inside HTML blocks
1373 current_html_block.push(String::new());
1374 } else if !current_paragraph.is_empty() {
1375 blocks.push(Block::Paragraph(current_paragraph.clone()));
1376 current_paragraph.clear();
1377 }
1378 // Mark that we saw a blank line
1379 had_preceding_blank = true;
1380 }
1381 LineType::Content(content) => {
1382 flush_admonition(
1383 &mut blocks,
1384 &mut in_admonition_block,
1385 &mut admonition_header,
1386 &mut admonition_content,
1387 );
1388 // Check if we're currently in an HTML block
1389 if in_html_block {
1390 current_html_block.push(content.clone());
1391
1392 // Check if this line closes any open HTML tags
1393 if let Some(last_tag) = html_tag_stack.last() {
1394 if is_html_closing_tag(content, last_tag) {
1395 html_tag_stack.pop();
1396
1397 // If stack is empty, HTML block is complete
1398 if html_tag_stack.is_empty() {
1399 blocks.push(Block::Html {
1400 lines: current_html_block.clone(),
1401 has_preceding_blank: html_block_has_preceding_blank,
1402 });
1403 current_html_block.clear();
1404 in_html_block = false;
1405 }
1406 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1407 // Nested opening tag within HTML block
1408 if !is_self_closing_tag(content) {
1409 html_tag_stack.push(new_tag);
1410 }
1411 }
1412 }
1413 had_preceding_blank = false;
1414 } else {
1415 // Not in HTML block - check if this line starts one
1416 if let Some(tag_name) = is_block_html_opening_tag(content) {
1417 // Flush current paragraph before starting HTML block
1418 if in_code {
1419 blocks.push(Block::Code {
1420 lines: current_code_block.clone(),
1421 has_preceding_blank: code_block_has_preceding_blank,
1422 });
1423 current_code_block.clear();
1424 in_code = false;
1425 } else if in_nested_list {
1426 blocks.push(Block::NestedList(current_nested_list.clone()));
1427 current_nested_list.clear();
1428 in_nested_list = false;
1429 } else if !current_paragraph.is_empty() {
1430 blocks.push(Block::Paragraph(current_paragraph.clone()));
1431 current_paragraph.clear();
1432 }
1433
1434 // Start new HTML block
1435 in_html_block = true;
1436 html_block_has_preceding_blank = had_preceding_blank;
1437 current_html_block.push(content.clone());
1438
1439 // Check if it's self-closing or needs a closing tag
1440 if is_self_closing_tag(content) {
1441 // Self-closing tag - complete the HTML block immediately
1442 blocks.push(Block::Html {
1443 lines: current_html_block.clone(),
1444 has_preceding_blank: html_block_has_preceding_blank,
1445 });
1446 current_html_block.clear();
1447 in_html_block = false;
1448 } else {
1449 // Regular opening tag - push to stack
1450 html_tag_stack.push(tag_name);
1451 }
1452 } else {
1453 // Regular content line - add to paragraph
1454 if in_code {
1455 // Switching from code to content
1456 blocks.push(Block::Code {
1457 lines: current_code_block.clone(),
1458 has_preceding_blank: code_block_has_preceding_blank,
1459 });
1460 current_code_block.clear();
1461 in_code = false;
1462 } else if in_nested_list {
1463 // Switching from nested list to content
1464 blocks.push(Block::NestedList(current_nested_list.clone()));
1465 current_nested_list.clear();
1466 in_nested_list = false;
1467 }
1468 current_paragraph.push(content.clone());
1469 }
1470 had_preceding_blank = false; // Reset after content
1471 }
1472 }
1473 LineType::CodeBlock(content, indent) => {
1474 flush_admonition(
1475 &mut blocks,
1476 &mut in_admonition_block,
1477 &mut admonition_header,
1478 &mut admonition_content,
1479 );
1480 if in_nested_list {
1481 // Switching from nested list to code
1482 blocks.push(Block::NestedList(current_nested_list.clone()));
1483 current_nested_list.clear();
1484 in_nested_list = false;
1485 } else if in_html_block {
1486 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1487 blocks.push(Block::Html {
1488 lines: current_html_block.clone(),
1489 has_preceding_blank: html_block_has_preceding_blank,
1490 });
1491 current_html_block.clear();
1492 html_tag_stack.clear();
1493 in_html_block = false;
1494 }
1495 if !in_code {
1496 // Switching from content to code
1497 if !current_paragraph.is_empty() {
1498 blocks.push(Block::Paragraph(current_paragraph.clone()));
1499 current_paragraph.clear();
1500 }
1501 in_code = true;
1502 // Record whether there was a blank line before this code block
1503 code_block_has_preceding_blank = had_preceding_blank;
1504 }
1505 current_code_block.push((content.clone(), *indent));
1506 had_preceding_blank = false; // Reset after code
1507 }
1508 LineType::NestedListItem(content, indent) => {
1509 flush_admonition(
1510 &mut blocks,
1511 &mut in_admonition_block,
1512 &mut admonition_header,
1513 &mut admonition_content,
1514 );
1515 if in_code {
1516 // Switching from code to nested list
1517 blocks.push(Block::Code {
1518 lines: current_code_block.clone(),
1519 has_preceding_blank: code_block_has_preceding_blank,
1520 });
1521 current_code_block.clear();
1522 in_code = false;
1523 } else if in_html_block {
1524 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1525 blocks.push(Block::Html {
1526 lines: current_html_block.clone(),
1527 has_preceding_blank: html_block_has_preceding_blank,
1528 });
1529 current_html_block.clear();
1530 html_tag_stack.clear();
1531 in_html_block = false;
1532 }
1533 if !in_nested_list {
1534 // Switching from content to nested list
1535 if !current_paragraph.is_empty() {
1536 blocks.push(Block::Paragraph(current_paragraph.clone()));
1537 current_paragraph.clear();
1538 }
1539 in_nested_list = true;
1540 }
1541 current_nested_list.push((content.clone(), *indent));
1542 had_preceding_blank = false; // Reset after nested list
1543 }
1544 LineType::SemanticLine(content) => {
1545 // Semantic lines are standalone - flush any current block and add as separate block
1546 flush_admonition(
1547 &mut blocks,
1548 &mut in_admonition_block,
1549 &mut admonition_header,
1550 &mut admonition_content,
1551 );
1552 if in_code {
1553 blocks.push(Block::Code {
1554 lines: current_code_block.clone(),
1555 has_preceding_blank: code_block_has_preceding_blank,
1556 });
1557 current_code_block.clear();
1558 in_code = false;
1559 } else if in_nested_list {
1560 blocks.push(Block::NestedList(current_nested_list.clone()));
1561 current_nested_list.clear();
1562 in_nested_list = false;
1563 } else if in_html_block {
1564 blocks.push(Block::Html {
1565 lines: current_html_block.clone(),
1566 has_preceding_blank: html_block_has_preceding_blank,
1567 });
1568 current_html_block.clear();
1569 html_tag_stack.clear();
1570 in_html_block = false;
1571 } else if !current_paragraph.is_empty() {
1572 blocks.push(Block::Paragraph(current_paragraph.clone()));
1573 current_paragraph.clear();
1574 }
1575 // Add semantic line as its own block
1576 blocks.push(Block::SemanticLine(content.clone()));
1577 had_preceding_blank = false; // Reset after semantic line
1578 }
1579 LineType::SnippetLine(content) => {
1580 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1581 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1582 flush_admonition(
1583 &mut blocks,
1584 &mut in_admonition_block,
1585 &mut admonition_header,
1586 &mut admonition_content,
1587 );
1588 if in_code {
1589 blocks.push(Block::Code {
1590 lines: current_code_block.clone(),
1591 has_preceding_blank: code_block_has_preceding_blank,
1592 });
1593 current_code_block.clear();
1594 in_code = false;
1595 } else if in_nested_list {
1596 blocks.push(Block::NestedList(current_nested_list.clone()));
1597 current_nested_list.clear();
1598 in_nested_list = false;
1599 } else if in_html_block {
1600 blocks.push(Block::Html {
1601 lines: current_html_block.clone(),
1602 has_preceding_blank: html_block_has_preceding_blank,
1603 });
1604 current_html_block.clear();
1605 html_tag_stack.clear();
1606 in_html_block = false;
1607 } else if !current_paragraph.is_empty() {
1608 blocks.push(Block::Paragraph(current_paragraph.clone()));
1609 current_paragraph.clear();
1610 }
1611 // Add snippet line as its own block
1612 blocks.push(Block::SnippetLine(content.clone()));
1613 had_preceding_blank = false;
1614 }
1615 LineType::DivMarker(content) => {
1616 // Div markers (::: opening or closing) are standalone structural delimiters
1617 // Flush any current block and add as separate block
1618 flush_admonition(
1619 &mut blocks,
1620 &mut in_admonition_block,
1621 &mut admonition_header,
1622 &mut admonition_content,
1623 );
1624 if in_code {
1625 blocks.push(Block::Code {
1626 lines: current_code_block.clone(),
1627 has_preceding_blank: code_block_has_preceding_blank,
1628 });
1629 current_code_block.clear();
1630 in_code = false;
1631 } else if in_nested_list {
1632 blocks.push(Block::NestedList(current_nested_list.clone()));
1633 current_nested_list.clear();
1634 in_nested_list = false;
1635 } else if in_html_block {
1636 blocks.push(Block::Html {
1637 lines: current_html_block.clone(),
1638 has_preceding_blank: html_block_has_preceding_blank,
1639 });
1640 current_html_block.clear();
1641 html_tag_stack.clear();
1642 in_html_block = false;
1643 } else if !current_paragraph.is_empty() {
1644 blocks.push(Block::Paragraph(current_paragraph.clone()));
1645 current_paragraph.clear();
1646 }
1647 blocks.push(Block::DivMarker(content.clone()));
1648 had_preceding_blank = false;
1649 }
1650 LineType::AdmonitionHeader(header_text, indent) => {
1651 flush_admonition(
1652 &mut blocks,
1653 &mut in_admonition_block,
1654 &mut admonition_header,
1655 &mut admonition_content,
1656 );
1657 // Flush other current blocks
1658 if in_code {
1659 blocks.push(Block::Code {
1660 lines: current_code_block.clone(),
1661 has_preceding_blank: code_block_has_preceding_blank,
1662 });
1663 current_code_block.clear();
1664 in_code = false;
1665 } else if in_nested_list {
1666 blocks.push(Block::NestedList(current_nested_list.clone()));
1667 current_nested_list.clear();
1668 in_nested_list = false;
1669 } else if in_html_block {
1670 blocks.push(Block::Html {
1671 lines: current_html_block.clone(),
1672 has_preceding_blank: html_block_has_preceding_blank,
1673 });
1674 current_html_block.clear();
1675 html_tag_stack.clear();
1676 in_html_block = false;
1677 } else if !current_paragraph.is_empty() {
1678 blocks.push(Block::Paragraph(current_paragraph.clone()));
1679 current_paragraph.clear();
1680 }
1681 // Start new admonition block
1682 in_admonition_block = true;
1683 admonition_header = Some((header_text.clone(), *indent));
1684 admonition_content.clear();
1685 had_preceding_blank = false;
1686 }
1687 LineType::AdmonitionContent(content, indent) => {
1688 if in_admonition_block {
1689 // Add to current admonition body
1690 admonition_content.push((content.clone(), *indent));
1691 } else {
1692 // Admonition content without a header should not happen,
1693 // but treat it as regular content to avoid data loss
1694 current_paragraph.push(content.clone());
1695 }
1696 had_preceding_blank = false;
1697 }
1698 }
1699 }
1700
1701 // Push all remaining pending blocks independently
1702 flush_admonition(
1703 &mut blocks,
1704 &mut in_admonition_block,
1705 &mut admonition_header,
1706 &mut admonition_content,
1707 );
1708 if in_code && !current_code_block.is_empty() {
1709 blocks.push(Block::Code {
1710 lines: current_code_block,
1711 has_preceding_blank: code_block_has_preceding_blank,
1712 });
1713 }
1714 if in_nested_list && !current_nested_list.is_empty() {
1715 blocks.push(Block::NestedList(current_nested_list));
1716 }
1717 if in_html_block && !current_html_block.is_empty() {
1718 blocks.push(Block::Html {
1719 lines: current_html_block,
1720 has_preceding_blank: html_block_has_preceding_blank,
1721 });
1722 }
1723 if !current_paragraph.is_empty() {
1724 blocks.push(Block::Paragraph(current_paragraph));
1725 }
1726
1727 // Helper: check if a line (raw source or stripped content) is exempt
1728 // from line-length checks. Link reference definitions are always exempt;
1729 // standalone link/image lines are exempt when strict mode is off.
1730 // Also checks content after stripping list markers, since list item
1731 // continuation lines may contain link ref defs.
1732 let is_exempt_line = |raw_line: &str| -> bool {
1733 let trimmed = raw_line.trim();
1734 // Link reference definitions: always exempt
1735 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
1736 return true;
1737 }
1738 // Also check after stripping list markers (for list item content)
1739 if is_list_item(trimmed) {
1740 let (_, content) = extract_list_marker_and_content(trimmed);
1741 let content_trimmed = content.trim();
1742 if content_trimmed.starts_with('[')
1743 && content_trimmed.contains("]:")
1744 && LINK_REF_PATTERN.is_match(content_trimmed)
1745 {
1746 return true;
1747 }
1748 }
1749 // Standalone link/image lines: exempt when not strict
1750 if !config.strict && is_standalone_link_or_image_line(raw_line) {
1751 return true;
1752 }
1753 false
1754 };
1755
1756 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1757 // Exclude link reference definitions and standalone link lines from content
1758 // so they don't pollute combined_content or trigger false reflow.
1759 let content_lines: Vec<String> = list_item_lines
1760 .iter()
1761 .filter_map(|line| {
1762 if let LineType::Content(s) = line {
1763 if is_exempt_line(s) {
1764 return None;
1765 }
1766 Some(s.clone())
1767 } else {
1768 None
1769 }
1770 })
1771 .collect();
1772
1773 // Check if we need to reflow this list item
1774 // We check the combined content to see if it exceeds length limits
1775 let combined_content = content_lines.join(" ").trim().to_string();
1776
1777 // Helper to check if we should reflow in normalize mode
1778 let should_normalize = || {
1779 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1780 // DO normalize if it has plain text content that spans multiple lines
1781 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1782 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1783 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1784 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1785 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1786 let has_admonitions = blocks.iter().any(|b| matches!(b, Block::Admonition { .. }));
1787 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1788
1789 // If we have structural blocks but no paragraphs, don't normalize
1790 if (has_nested_lists
1791 || has_code_blocks
1792 || has_semantic_lines
1793 || has_snippet_lines
1794 || has_div_markers
1795 || has_admonitions)
1796 && !has_paragraphs
1797 {
1798 return false;
1799 }
1800
1801 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1802 if has_paragraphs {
1803 // Count only paragraphs that contain at least one non-exempt line.
1804 // Paragraphs consisting entirely of link ref defs or standalone links
1805 // should not trigger normalization.
1806 let paragraph_count = blocks
1807 .iter()
1808 .filter(|b| {
1809 if let Block::Paragraph(para_lines) = b {
1810 !para_lines.iter().all(|line| is_exempt_line(line))
1811 } else {
1812 false
1813 }
1814 })
1815 .count();
1816 if paragraph_count > 1 {
1817 // Multiple non-exempt paragraph blocks should be normalized
1818 return true;
1819 }
1820
1821 // Single paragraph block: normalize if it has multiple content lines
1822 if content_lines.len() > 1 {
1823 return true;
1824 }
1825 }
1826
1827 false
1828 };
1829
1830 let needs_reflow = match config.reflow_mode {
1831 ReflowMode::Normalize => {
1832 // Only reflow if:
1833 // 1. Any non-exempt paragraph, when joined, exceeds the limit, OR
1834 // 2. Any admonition content line exceeds the limit, OR
1835 // 3. The list item should be normalized (has multi-line plain text)
1836 let any_paragraph_exceeds = blocks.iter().any(|block| match block {
1837 Block::Paragraph(para_lines) => {
1838 if para_lines.iter().all(|line| is_exempt_line(line)) {
1839 return false;
1840 }
1841 let joined = para_lines.join(" ");
1842 let with_marker = format!("{}{}", " ".repeat(indent_size), joined.trim());
1843 self.calculate_effective_length(&with_marker) > config.line_length.get()
1844 }
1845 Block::Admonition {
1846 content_lines,
1847 header_indent,
1848 ..
1849 } => content_lines.iter().any(|(content, indent)| {
1850 if content.is_empty() {
1851 return false;
1852 }
1853 let with_indent = format!("{}{}", " ".repeat(*indent.max(header_indent)), content);
1854 self.calculate_effective_length(&with_indent) > config.line_length.get()
1855 }),
1856 _ => false,
1857 });
1858 if any_paragraph_exceeds {
1859 true
1860 } else {
1861 should_normalize()
1862 }
1863 }
1864 ReflowMode::SentencePerLine => {
1865 // Check if list item has multiple sentences
1866 let sentences = split_into_sentences(&combined_content);
1867 sentences.len() > 1
1868 }
1869 ReflowMode::SemanticLineBreaks => {
1870 let sentences = split_into_sentences(&combined_content);
1871 sentences.len() > 1
1872 || (list_start..i).any(|line_idx| {
1873 let line = lines[line_idx];
1874 let trimmed = line.trim();
1875 if trimmed.is_empty() || is_exempt_line(line) {
1876 return false;
1877 }
1878 self.calculate_effective_length(line) > config.line_length.get()
1879 })
1880 }
1881 ReflowMode::Default => {
1882 // In default mode, only reflow if any individual non-exempt line exceeds limit
1883 (list_start..i).any(|line_idx| {
1884 let line = lines[line_idx];
1885 let trimmed = line.trim();
1886 // Skip blank lines and exempt lines
1887 if trimmed.is_empty() || is_exempt_line(line) {
1888 return false;
1889 }
1890 self.calculate_effective_length(line) > config.line_length.get()
1891 })
1892 }
1893 };
1894
1895 if needs_reflow {
1896 let start_range = line_index.whole_line_range(list_start + 1);
1897 let end_line = i - 1;
1898 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1899 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1900 } else {
1901 line_index.whole_line_range(end_line + 1)
1902 };
1903 let byte_range = start_range.start..end_range.end;
1904
1905 // Reflow each block (paragraphs only, preserve code blocks)
1906 // When line_length = 0 (no limit), use a very large value for reflow
1907 let reflow_line_length = if config.line_length.is_unlimited() {
1908 usize::MAX
1909 } else {
1910 config.line_length.get().saturating_sub(indent_size).max(1)
1911 };
1912 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1913 line_length: reflow_line_length,
1914 break_on_sentences: true,
1915 preserve_breaks: false,
1916 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1917 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1918 abbreviations: config.abbreviations_for_reflow(),
1919 length_mode: self.reflow_length_mode(),
1920 attr_lists: ctx.flavor.supports_attr_lists(),
1921 };
1922
1923 let mut result: Vec<String> = Vec::new();
1924 let mut is_first_block = true;
1925
1926 for (block_idx, block) in blocks.iter().enumerate() {
1927 match block {
1928 Block::Paragraph(para_lines) => {
1929 // If every line in this paragraph is exempt (link ref defs,
1930 // standalone links), preserve the paragraph verbatim instead
1931 // of reflowing it. Reflowing would corrupt link ref defs.
1932 let all_exempt = para_lines.iter().all(|line| is_exempt_line(line));
1933
1934 if all_exempt {
1935 for (idx, line) in para_lines.iter().enumerate() {
1936 if is_first_block && idx == 0 {
1937 result.push(format!("{marker}{line}"));
1938 is_first_block = false;
1939 } else {
1940 result.push(format!("{expected_indent}{line}"));
1941 }
1942 }
1943 } else {
1944 // Split the paragraph into segments at hard break boundaries
1945 // Each segment can be reflowed independently
1946 let segments = split_into_segments(para_lines);
1947
1948 for (segment_idx, segment) in segments.iter().enumerate() {
1949 // Check if this segment ends with a hard break and what type
1950 let hard_break_type = segment.last().and_then(|line| {
1951 let line = line.strip_suffix('\r').unwrap_or(line);
1952 if line.ends_with('\\') {
1953 Some("\\")
1954 } else if line.ends_with(" ") {
1955 Some(" ")
1956 } else {
1957 None
1958 }
1959 });
1960
1961 // Join and reflow the segment (removing the hard break marker for processing)
1962 let segment_for_reflow: Vec<String> = segment
1963 .iter()
1964 .map(|line| {
1965 // Strip hard break marker (2 spaces or backslash) for reflow processing
1966 if line.ends_with('\\') {
1967 line[..line.len() - 1].trim_end().to_string()
1968 } else if line.ends_with(" ") {
1969 line[..line.len() - 2].trim_end().to_string()
1970 } else {
1971 line.clone()
1972 }
1973 })
1974 .collect();
1975
1976 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1977 if !segment_text.is_empty() {
1978 let reflowed =
1979 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1980
1981 if is_first_block && segment_idx == 0 {
1982 // First segment of first block starts with marker
1983 result.push(format!("{marker}{}", reflowed[0]));
1984 for line in reflowed.iter().skip(1) {
1985 result.push(format!("{expected_indent}{line}"));
1986 }
1987 is_first_block = false;
1988 } else {
1989 // Subsequent segments
1990 for line in reflowed {
1991 result.push(format!("{expected_indent}{line}"));
1992 }
1993 }
1994
1995 // If this segment had a hard break, add it back to the last line
1996 // Preserve the original hard break format (backslash or two spaces)
1997 if let Some(break_marker) = hard_break_type
1998 && let Some(last_line) = result.last_mut()
1999 {
2000 last_line.push_str(break_marker);
2001 }
2002 }
2003 }
2004 }
2005
2006 // Add blank line after paragraph block if there's a next block.
2007 // Check if next block is a code block that doesn't want a preceding blank.
2008 // Also don't add blank lines before snippet lines (they should stay tight).
2009 // Only add if not already ending with one (avoids double blanks).
2010 if block_idx < blocks.len() - 1 {
2011 let next_block = &blocks[block_idx + 1];
2012 let should_add_blank = match next_block {
2013 Block::Code {
2014 has_preceding_blank, ..
2015 } => *has_preceding_blank,
2016 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2017 _ => true, // For all other blocks, add blank line
2018 };
2019 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2020 {
2021 result.push(String::new());
2022 }
2023 }
2024 }
2025 Block::Code {
2026 lines: code_lines,
2027 has_preceding_blank: _,
2028 } => {
2029 // Preserve code blocks as-is with original indentation
2030 // NOTE: Blank line before code block is handled by the previous block
2031 // (see paragraph block's logic above)
2032
2033 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
2034 if is_first_block && idx == 0 {
2035 // First line of first block gets marker
2036 result.push(format!(
2037 "{marker}{}",
2038 " ".repeat(orig_indent - marker_len) + content
2039 ));
2040 is_first_block = false;
2041 } else if content.is_empty() {
2042 result.push(String::new());
2043 } else {
2044 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2045 }
2046 }
2047 }
2048 Block::NestedList(nested_items) => {
2049 // Preserve nested list items as-is with original indentation.
2050 // Only add blank before if not already ending with one (avoids
2051 // double blanks when the preceding block already added one).
2052 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
2053 result.push(String::new());
2054 }
2055
2056 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
2057 if is_first_block && idx == 0 {
2058 // First line of first block gets marker
2059 result.push(format!(
2060 "{marker}{}",
2061 " ".repeat(orig_indent - marker_len) + content
2062 ));
2063 is_first_block = false;
2064 } else if content.is_empty() {
2065 result.push(String::new());
2066 } else {
2067 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2068 }
2069 }
2070
2071 // Add blank line after nested list if there's a next block.
2072 // Only add if not already ending with one (avoids double blanks
2073 // when the last nested item was already a blank line).
2074 if block_idx < blocks.len() - 1 {
2075 let next_block = &blocks[block_idx + 1];
2076 let should_add_blank = match next_block {
2077 Block::Code {
2078 has_preceding_blank, ..
2079 } => *has_preceding_blank,
2080 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2081 _ => true, // For all other blocks, add blank line
2082 };
2083 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2084 {
2085 result.push(String::new());
2086 }
2087 }
2088 }
2089 Block::SemanticLine(content) => {
2090 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
2091 // Only add blank before if not already ending with one.
2092 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
2093 result.push(String::new());
2094 }
2095
2096 if is_first_block {
2097 // First block starts with marker
2098 result.push(format!("{marker}{content}"));
2099 is_first_block = false;
2100 } else {
2101 // Subsequent blocks use expected indent
2102 result.push(format!("{expected_indent}{content}"));
2103 }
2104
2105 // Add blank line after semantic line if there's a next block.
2106 // Only add if not already ending with one.
2107 if block_idx < blocks.len() - 1 {
2108 let next_block = &blocks[block_idx + 1];
2109 let should_add_blank = match next_block {
2110 Block::Code {
2111 has_preceding_blank, ..
2112 } => *has_preceding_blank,
2113 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2114 _ => true, // For all other blocks, add blank line
2115 };
2116 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2117 {
2118 result.push(String::new());
2119 }
2120 }
2121 }
2122 Block::SnippetLine(content) => {
2123 // Preserve snippet delimiters (-8<-) as-is on their own line
2124 // Unlike semantic lines, snippet lines don't add extra blank lines
2125 if is_first_block {
2126 // First block starts with marker
2127 result.push(format!("{marker}{content}"));
2128 is_first_block = false;
2129 } else {
2130 // Subsequent blocks use expected indent
2131 result.push(format!("{expected_indent}{content}"));
2132 }
2133 // No blank lines added before or after snippet delimiters
2134 }
2135 Block::DivMarker(content) => {
2136 // Preserve div markers (::: opening or closing) as-is on their own line
2137 if is_first_block {
2138 result.push(format!("{marker}{content}"));
2139 is_first_block = false;
2140 } else {
2141 result.push(format!("{expected_indent}{content}"));
2142 }
2143 }
2144 Block::Html {
2145 lines: html_lines,
2146 has_preceding_blank: _,
2147 } => {
2148 // Preserve HTML blocks exactly as-is with original indentation
2149 // NOTE: Blank line before HTML block is handled by the previous block
2150
2151 for (idx, line) in html_lines.iter().enumerate() {
2152 if is_first_block && idx == 0 {
2153 // First line of first block gets marker
2154 result.push(format!("{marker}{line}"));
2155 is_first_block = false;
2156 } else if line.is_empty() {
2157 // Preserve blank lines inside HTML blocks
2158 result.push(String::new());
2159 } else {
2160 // Preserve lines with their original content (already includes indentation)
2161 result.push(format!("{expected_indent}{line}"));
2162 }
2163 }
2164
2165 // Add blank line after HTML block if there's a next block.
2166 // Only add if not already ending with one (avoids double blanks
2167 // when the HTML block itself contained a trailing blank line).
2168 if block_idx < blocks.len() - 1 {
2169 let next_block = &blocks[block_idx + 1];
2170 let should_add_blank = match next_block {
2171 Block::Code {
2172 has_preceding_blank, ..
2173 } => *has_preceding_blank,
2174 Block::Html {
2175 has_preceding_blank, ..
2176 } => *has_preceding_blank,
2177 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2178 _ => true, // For all other blocks, add blank line
2179 };
2180 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2181 {
2182 result.push(String::new());
2183 }
2184 }
2185 }
2186 Block::Admonition {
2187 header,
2188 header_indent,
2189 content_lines: admon_lines,
2190 } => {
2191 // Reconstruct admonition block with header at original indent
2192 // and body content reflowed to fit within the line length limit
2193
2194 // Add blank line before admonition if not first block
2195 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
2196 result.push(String::new());
2197 }
2198
2199 // Output the header at its original indent
2200 let header_indent_str = " ".repeat(*header_indent);
2201 if is_first_block {
2202 result.push(format!(
2203 "{marker}{}",
2204 " ".repeat(header_indent.saturating_sub(marker_len)) + header
2205 ));
2206 is_first_block = false;
2207 } else {
2208 result.push(format!("{header_indent_str}{header}"));
2209 }
2210
2211 // Derive body indent from the first non-empty content line's
2212 // stored indent, falling back to header_indent + 4 for
2213 // empty-body admonitions
2214 let body_indent = admon_lines
2215 .iter()
2216 .find(|(content, _)| !content.is_empty())
2217 .map(|(_, indent)| *indent)
2218 .unwrap_or(header_indent + 4);
2219 let body_indent_str = " ".repeat(body_indent);
2220
2221 // Collect body content into paragraphs separated by blank lines
2222 let mut body_paragraphs: Vec<Vec<String>> = Vec::new();
2223 let mut current_para: Vec<String> = Vec::new();
2224
2225 for (content, _orig_indent) in admon_lines {
2226 if content.is_empty() {
2227 if !current_para.is_empty() {
2228 body_paragraphs.push(current_para.clone());
2229 current_para.clear();
2230 }
2231 } else {
2232 current_para.push(content.clone());
2233 }
2234 }
2235 if !current_para.is_empty() {
2236 body_paragraphs.push(current_para);
2237 }
2238
2239 // Reflow each paragraph in the body
2240 for paragraph in &body_paragraphs {
2241 // Add blank line before each paragraph (including the first, after the header)
2242 result.push(String::new());
2243
2244 let paragraph_text = paragraph.join(" ").trim().to_string();
2245 if paragraph_text.is_empty() {
2246 continue;
2247 }
2248
2249 // Reflow with adjusted line length
2250 let admon_reflow_length = if config.line_length.is_unlimited() {
2251 usize::MAX
2252 } else {
2253 config.line_length.get().saturating_sub(body_indent).max(1)
2254 };
2255
2256 let admon_reflow_options = crate::utils::text_reflow::ReflowOptions {
2257 line_length: admon_reflow_length,
2258 break_on_sentences: true,
2259 preserve_breaks: false,
2260 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2261 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2262 abbreviations: config.abbreviations_for_reflow(),
2263 length_mode: self.reflow_length_mode(),
2264 attr_lists: ctx.flavor.supports_attr_lists(),
2265 };
2266
2267 let reflowed =
2268 crate::utils::text_reflow::reflow_line(¶graph_text, &admon_reflow_options);
2269 for line in &reflowed {
2270 result.push(format!("{body_indent_str}{line}"));
2271 }
2272 }
2273
2274 // Add blank line after admonition if there's a next block
2275 if block_idx < blocks.len() - 1 {
2276 let next_block = &blocks[block_idx + 1];
2277 let should_add_blank = match next_block {
2278 Block::Code {
2279 has_preceding_blank, ..
2280 } => *has_preceding_blank,
2281 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2282 _ => true,
2283 };
2284 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2285 {
2286 result.push(String::new());
2287 }
2288 }
2289 }
2290 }
2291 }
2292
2293 let reflowed_text = result.join(line_ending);
2294
2295 // Preserve trailing newline
2296 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2297 format!("{reflowed_text}{line_ending}")
2298 } else {
2299 reflowed_text
2300 };
2301
2302 // Get the original text to compare
2303 let original_text = &ctx.content[byte_range.clone()];
2304
2305 // Only generate a warning if the replacement is different from the original
2306 if original_text != replacement {
2307 // Generate an appropriate message based on why reflow is needed
2308 let message = match config.reflow_mode {
2309 ReflowMode::SentencePerLine => {
2310 let num_sentences = split_into_sentences(&combined_content).len();
2311 let num_lines = content_lines.len();
2312 if num_lines == 1 {
2313 // Single line with multiple sentences
2314 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2315 } else {
2316 // Multiple lines - could be split sentences or mixed
2317 format!(
2318 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
2319 )
2320 }
2321 }
2322 ReflowMode::SemanticLineBreaks => {
2323 let num_sentences = split_into_sentences(&combined_content).len();
2324 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
2325 }
2326 ReflowMode::Normalize => {
2327 // Find the longest non-exempt paragraph when joined
2328 let max_para_length = blocks
2329 .iter()
2330 .filter_map(|block| {
2331 if let Block::Paragraph(para_lines) = block {
2332 if para_lines.iter().all(|line| is_exempt_line(line)) {
2333 return None;
2334 }
2335 let joined = para_lines.join(" ");
2336 let with_indent = format!("{}{}", " ".repeat(indent_size), joined.trim());
2337 Some(self.calculate_effective_length(&with_indent))
2338 } else {
2339 None
2340 }
2341 })
2342 .max()
2343 .unwrap_or(0);
2344 if max_para_length > config.line_length.get() {
2345 format!(
2346 "Line length {} exceeds {} characters",
2347 max_para_length,
2348 config.line_length.get()
2349 )
2350 } else {
2351 "Multi-line content can be normalized".to_string()
2352 }
2353 }
2354 ReflowMode::Default => {
2355 // Report the actual longest non-exempt line, not the combined content
2356 let max_length = (list_start..i)
2357 .filter(|&line_idx| {
2358 let line = lines[line_idx];
2359 let trimmed = line.trim();
2360 !trimmed.is_empty() && !is_exempt_line(line)
2361 })
2362 .map(|line_idx| self.calculate_effective_length(lines[line_idx]))
2363 .max()
2364 .unwrap_or(0);
2365 format!(
2366 "Line length {} exceeds {} characters",
2367 max_length,
2368 config.line_length.get()
2369 )
2370 }
2371 };
2372
2373 warnings.push(LintWarning {
2374 rule_name: Some(self.name().to_string()),
2375 message,
2376 line: list_start + 1,
2377 column: 1,
2378 end_line: end_line + 1,
2379 end_column: lines[end_line].len() + 1,
2380 severity: Severity::Warning,
2381 fix: Some(crate::rule::Fix {
2382 range: byte_range,
2383 replacement,
2384 }),
2385 });
2386 }
2387 }
2388 continue;
2389 }
2390
2391 // Found start of a paragraph - collect all lines in it
2392 let paragraph_start = i;
2393 let mut paragraph_lines = vec![lines[i]];
2394 i += 1;
2395
2396 while i < lines.len() {
2397 let next_line = lines[i];
2398 let next_line_num = i + 1;
2399 let next_trimmed = next_line.trim();
2400
2401 // Stop at paragraph boundaries
2402 if next_trimmed.is_empty()
2403 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
2404 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
2405 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
2406 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
2407 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
2408 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
2409 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
2410 || ctx
2411 .line_info(next_line_num)
2412 .is_some_and(|info| info.in_mkdocs_container())
2413 || (next_line_num > 0
2414 && next_line_num <= ctx.lines.len()
2415 && ctx.lines[next_line_num - 1].blockquote.is_some())
2416 || next_trimmed.starts_with('#')
2417 || TableUtils::is_potential_table_row(next_line)
2418 || is_list_item(next_trimmed)
2419 || is_horizontal_rule(next_trimmed)
2420 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
2421 || is_template_directive_only(next_line)
2422 || is_standalone_attr_list(next_line)
2423 || is_snippet_block_delimiter(next_line)
2424 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
2425 {
2426 break;
2427 }
2428
2429 // Check if the previous line ends with a hard break (2+ spaces or backslash)
2430 if i > 0 && has_hard_break(lines[i - 1]) {
2431 // Don't include lines after hard breaks in the same paragraph
2432 break;
2433 }
2434
2435 paragraph_lines.push(next_line);
2436 i += 1;
2437 }
2438
2439 // Combine paragraph lines into a single string for processing
2440 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
2441 let paragraph_text = paragraph_lines.join(" ");
2442
2443 // Skip reflowing if this paragraph contains definition list items
2444 // Definition lists are multi-line structures that should not be joined
2445 let contains_definition_list = paragraph_lines
2446 .iter()
2447 .any(|line| crate::utils::is_definition_list_item(line));
2448
2449 if contains_definition_list {
2450 // Don't reflow definition lists - skip this paragraph
2451 i = paragraph_start + paragraph_lines.len();
2452 continue;
2453 }
2454
2455 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2456 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2457 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2458
2459 if contains_snippets {
2460 // Don't reflow Snippets blocks - skip this paragraph
2461 i = paragraph_start + paragraph_lines.len();
2462 continue;
2463 }
2464
2465 // Check if this paragraph needs reflowing
2466 let needs_reflow = match config.reflow_mode {
2467 ReflowMode::Normalize => {
2468 // In normalize mode, reflow multi-line paragraphs
2469 paragraph_lines.len() > 1
2470 }
2471 ReflowMode::SentencePerLine => {
2472 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2473 // Note: we check the joined text because sentences can span multiple lines
2474 let sentences = split_into_sentences(¶graph_text);
2475
2476 // Always reflow if multiple sentences on one line
2477 if sentences.len() > 1 {
2478 true
2479 } else if paragraph_lines.len() > 1 {
2480 // For single-sentence paragraphs spanning multiple lines:
2481 // Reflow if they COULD fit on one line (respecting line-length constraint)
2482 if config.line_length.is_unlimited() {
2483 // No line-length constraint - always join single sentences
2484 true
2485 } else {
2486 // Only join if it fits within line-length
2487 let effective_length = self.calculate_effective_length(¶graph_text);
2488 effective_length <= config.line_length.get()
2489 }
2490 } else {
2491 false
2492 }
2493 }
2494 ReflowMode::SemanticLineBreaks => {
2495 let sentences = split_into_sentences(¶graph_text);
2496 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2497 sentences.len() > 1
2498 || paragraph_lines.len() > 1
2499 || paragraph_lines
2500 .iter()
2501 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2502 }
2503 ReflowMode::Default => {
2504 // In default mode, only reflow if lines exceed limit
2505 paragraph_lines
2506 .iter()
2507 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2508 }
2509 };
2510
2511 if needs_reflow {
2512 // Calculate byte range for this paragraph
2513 // Use whole_line_range for each line and combine
2514 let start_range = line_index.whole_line_range(paragraph_start + 1);
2515 let end_line = paragraph_start + paragraph_lines.len() - 1;
2516
2517 // For the last line, we want to preserve any trailing newline
2518 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2519 // Last line without trailing newline - use line_text_range
2520 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2521 } else {
2522 // Not the last line or has trailing newline - use whole_line_range
2523 line_index.whole_line_range(end_line + 1)
2524 };
2525
2526 let byte_range = start_range.start..end_range.end;
2527
2528 // Check if the paragraph ends with a hard break and what type
2529 let hard_break_type = paragraph_lines.last().and_then(|line| {
2530 let line = line.strip_suffix('\r').unwrap_or(line);
2531 if line.ends_with('\\') {
2532 Some("\\")
2533 } else if line.ends_with(" ") {
2534 Some(" ")
2535 } else {
2536 None
2537 }
2538 });
2539
2540 // Reflow the paragraph
2541 // When line_length = 0 (no limit), use a very large value for reflow
2542 let reflow_line_length = if config.line_length.is_unlimited() {
2543 usize::MAX
2544 } else {
2545 config.line_length.get()
2546 };
2547 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2548 line_length: reflow_line_length,
2549 break_on_sentences: true,
2550 preserve_breaks: false,
2551 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2552 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2553 abbreviations: config.abbreviations_for_reflow(),
2554 length_mode: self.reflow_length_mode(),
2555 attr_lists: ctx.flavor.supports_attr_lists(),
2556 };
2557 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
2558
2559 // If the original paragraph ended with a hard break, preserve it
2560 // Preserve the original hard break format (backslash or two spaces)
2561 if let Some(break_marker) = hard_break_type
2562 && !reflowed.is_empty()
2563 {
2564 let last_idx = reflowed.len() - 1;
2565 if !has_hard_break(&reflowed[last_idx]) {
2566 reflowed[last_idx].push_str(break_marker);
2567 }
2568 }
2569
2570 let reflowed_text = reflowed.join(line_ending);
2571
2572 // Preserve trailing newline if the original paragraph had one
2573 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2574 format!("{reflowed_text}{line_ending}")
2575 } else {
2576 reflowed_text
2577 };
2578
2579 // Get the original text to compare
2580 let original_text = &ctx.content[byte_range.clone()];
2581
2582 // Only generate a warning if the replacement is different from the original
2583 if original_text != replacement {
2584 // Create warning with actual fix
2585 // In default mode, report the specific line that violates
2586 // In normalize mode, report the whole paragraph
2587 // In sentence-per-line mode, report the entire paragraph
2588 let (warning_line, warning_end_line) = match config.reflow_mode {
2589 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2590 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2591 // Highlight the entire paragraph that needs reformatting
2592 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2593 }
2594 ReflowMode::Default => {
2595 // Find the first line that exceeds the limit
2596 let mut violating_line = paragraph_start;
2597 for (idx, line) in paragraph_lines.iter().enumerate() {
2598 if self.calculate_effective_length(line) > config.line_length.get() {
2599 violating_line = paragraph_start + idx;
2600 break;
2601 }
2602 }
2603 (violating_line + 1, violating_line + 1)
2604 }
2605 };
2606
2607 warnings.push(LintWarning {
2608 rule_name: Some(self.name().to_string()),
2609 message: match config.reflow_mode {
2610 ReflowMode::Normalize => format!(
2611 "Paragraph could be normalized to use line length of {} characters",
2612 config.line_length.get()
2613 ),
2614 ReflowMode::SentencePerLine => {
2615 let num_sentences = split_into_sentences(¶graph_text).len();
2616 if paragraph_lines.len() == 1 {
2617 // Single line with multiple sentences
2618 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2619 } else {
2620 let num_lines = paragraph_lines.len();
2621 // Multiple lines - could be split sentences or mixed
2622 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2623 }
2624 },
2625 ReflowMode::SemanticLineBreaks => {
2626 let num_sentences = split_into_sentences(¶graph_text).len();
2627 format!(
2628 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2629 )
2630 },
2631 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2632 },
2633 line: warning_line,
2634 column: 1,
2635 end_line: warning_end_line,
2636 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2637 severity: Severity::Warning,
2638 fix: Some(crate::rule::Fix {
2639 range: byte_range,
2640 replacement,
2641 }),
2642 });
2643 }
2644 }
2645 }
2646
2647 warnings
2648 }
2649
2650 /// Calculate string length based on the configured length mode
2651 fn calculate_string_length(&self, s: &str) -> usize {
2652 match self.config.length_mode {
2653 LengthMode::Chars => s.chars().count(),
2654 LengthMode::Visual => s.width(),
2655 LengthMode::Bytes => s.len(),
2656 }
2657 }
2658
2659 /// Calculate effective line length
2660 ///
2661 /// Returns the actual display length of the line using the configured length mode.
2662 fn calculate_effective_length(&self, line: &str) -> usize {
2663 self.calculate_string_length(line)
2664 }
2665
2666 /// Calculate line length with inline link/image URLs removed.
2667 ///
2668 /// For each inline link `[text](url)` or image `` on the line,
2669 /// computes the "savings" from removing the URL portion (keeping only `[text]`
2670 /// or `![alt]`). Returns `effective_length - total_savings`.
2671 ///
2672 /// Handles nested constructs (e.g., `[](url)`) by only counting the
2673 /// outermost construct to avoid double-counting.
2674 fn calculate_text_only_length(
2675 &self,
2676 effective_length: usize,
2677 line_number: usize,
2678 ctx: &crate::lint_context::LintContext,
2679 ) -> usize {
2680 let line_range = ctx.line_index.line_content_range(line_number);
2681 let line_byte_end = line_range.end;
2682
2683 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2684 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2685
2686 for link in &ctx.links {
2687 if link.line != line_number || link.is_reference {
2688 continue;
2689 }
2690 if !matches!(link.link_type, LinkType::Inline) {
2691 continue;
2692 }
2693 // Skip cross-line links
2694 if link.byte_end > line_byte_end {
2695 continue;
2696 }
2697 // `[text]` in configured length mode
2698 let text_only_len = 2 + self.calculate_string_length(&link.text);
2699 constructs.push((link.byte_offset, link.byte_end, text_only_len));
2700 }
2701
2702 for image in &ctx.images {
2703 if image.line != line_number || image.is_reference {
2704 continue;
2705 }
2706 if !matches!(image.link_type, LinkType::Inline) {
2707 continue;
2708 }
2709 // Skip cross-line images
2710 if image.byte_end > line_byte_end {
2711 continue;
2712 }
2713 // `![alt]` in configured length mode
2714 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2715 constructs.push((image.byte_offset, image.byte_end, text_only_len));
2716 }
2717
2718 if constructs.is_empty() {
2719 return effective_length;
2720 }
2721
2722 // Sort by byte offset to handle overlapping/nested constructs
2723 constructs.sort_by_key(|&(start, _, _)| start);
2724
2725 let mut total_savings: usize = 0;
2726 let mut last_end: usize = 0;
2727
2728 for (start, end, text_only_len) in &constructs {
2729 // Skip constructs nested inside a previously counted one
2730 if *start < last_end {
2731 continue;
2732 }
2733 // Full construct length in configured length mode
2734 let full_source = &ctx.content[*start..*end];
2735 let full_len = self.calculate_string_length(full_source);
2736 total_savings += full_len.saturating_sub(*text_only_len);
2737 last_end = *end;
2738 }
2739
2740 effective_length.saturating_sub(total_savings)
2741 }
2742}