rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{
15 BlockquoteLineData, ReflowLengthMode, blockquote_continuation_style, dominant_blockquote_prefix,
16 reflow_blockquote_content, split_into_sentences,
17};
18use pulldown_cmark::LinkType;
19use toml;
20
21mod helpers;
22pub mod md013_config;
23use crate::utils::is_template_directive_only;
24use helpers::{
25 extract_list_marker_and_content, has_hard_break, is_github_alert_marker, is_horizontal_rule, is_list_item,
26 is_standalone_link_or_image_line, split_into_segments, trim_preserving_hard_break,
27};
28pub use md013_config::MD013Config;
29use md013_config::{LengthMode, ReflowMode};
30
31#[cfg(test)]
32mod tests;
33use unicode_width::UnicodeWidthStr;
34
35#[derive(Clone, Default)]
36pub struct MD013LineLength {
37 pub(crate) config: MD013Config,
38}
39
40/// Blockquote paragraph line collected for reflow, with original line index for range computation.
41struct CollectedBlockquoteLine {
42 line_idx: usize,
43 data: BlockquoteLineData,
44}
45
46impl MD013LineLength {
47 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
48 Self {
49 config: MD013Config {
50 line_length: crate::types::LineLength::new(line_length),
51 code_blocks,
52 tables,
53 headings,
54 paragraphs: true, // Default to true for backwards compatibility
55 strict,
56 reflow: false,
57 reflow_mode: ReflowMode::default(),
58 length_mode: LengthMode::default(),
59 abbreviations: Vec::new(),
60 },
61 }
62 }
63
64 pub fn from_config_struct(config: MD013Config) -> Self {
65 Self { config }
66 }
67
68 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
69 fn reflow_length_mode(&self) -> ReflowLengthMode {
70 match self.config.length_mode {
71 LengthMode::Chars => ReflowLengthMode::Chars,
72 LengthMode::Visual => ReflowLengthMode::Visual,
73 LengthMode::Bytes => ReflowLengthMode::Bytes,
74 }
75 }
76
77 fn should_ignore_line(
78 &self,
79 line: &str,
80 _lines: &[&str],
81 current_line: usize,
82 ctx: &crate::lint_context::LintContext,
83 ) -> bool {
84 if self.config.strict {
85 return false;
86 }
87
88 // Quick check for common patterns before expensive regex
89 let trimmed = line.trim();
90
91 // Only skip if the entire line is a URL (quick check first)
92 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
93 return true;
94 }
95
96 // Only skip if the entire line is an image reference (quick check first)
97 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
98 return true;
99 }
100
101 // Note: link reference definitions are handled as always-exempt (even in strict mode)
102 // in the main check loop, so they don't need to be checked here.
103
104 // Code blocks with long strings (only check if in code block)
105 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
106 && !trimmed.is_empty()
107 && !line.contains(' ')
108 && !line.contains('\t')
109 {
110 return true;
111 }
112
113 false
114 }
115
116 /// Check if rule should skip based on provided config (used for inline config support)
117 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
118 // Skip if content is empty
119 if ctx.content.is_empty() {
120 return true;
121 }
122
123 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
124 if config.reflow
125 && (config.reflow_mode == ReflowMode::SentencePerLine
126 || config.reflow_mode == ReflowMode::SemanticLineBreaks
127 || config.reflow_mode == ReflowMode::Normalize)
128 {
129 return false;
130 }
131
132 // Quick check: if total content is shorter than line limit, definitely skip
133 if ctx.content.len() <= config.line_length.get() {
134 return true;
135 }
136
137 // Skip if no line exceeds the limit
138 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
139 }
140}
141
142impl Rule for MD013LineLength {
143 fn name(&self) -> &'static str {
144 "MD013"
145 }
146
147 fn description(&self) -> &'static str {
148 "Line length should not be excessive"
149 }
150
151 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
152 // Use pre-parsed inline config from LintContext
153 let config_override = ctx.inline_config().get_rule_config("MD013");
154
155 // Apply configuration override if present
156 let effective_config = if let Some(json_config) = config_override {
157 if let Some(obj) = json_config.as_object() {
158 let mut config = self.config.clone();
159 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
160 config.line_length = crate::types::LineLength::new(line_length as usize);
161 }
162 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
163 config.code_blocks = code_blocks;
164 }
165 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
166 config.tables = tables;
167 }
168 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
169 config.headings = headings;
170 }
171 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
172 config.strict = strict;
173 }
174 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
175 config.reflow = reflow;
176 }
177 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
178 config.reflow_mode = match reflow_mode {
179 "default" => ReflowMode::Default,
180 "normalize" => ReflowMode::Normalize,
181 "sentence-per-line" => ReflowMode::SentencePerLine,
182 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
183 _ => ReflowMode::default(),
184 };
185 }
186 config
187 } else {
188 self.config.clone()
189 }
190 } else {
191 self.config.clone()
192 };
193
194 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
195 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
196 if self.should_skip_with_config(ctx, &effective_config)
197 && !(effective_config.reflow
198 && (effective_config.reflow_mode == ReflowMode::Normalize
199 || effective_config.reflow_mode == ReflowMode::SentencePerLine
200 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
201 {
202 return Ok(Vec::new());
203 }
204
205 // Direct implementation without DocumentStructure
206 let mut warnings = Vec::new();
207
208 // Special handling: line_length = 0 means "no line length limit"
209 // Skip all line length checks, but still allow reflow if enabled
210 let skip_length_checks = effective_config.line_length.is_unlimited();
211
212 // Pre-filter lines that could be problematic to avoid processing all lines
213 let mut candidate_lines = Vec::new();
214 if !skip_length_checks {
215 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
216 // Skip front matter - it should never be linted
217 if line_info.in_front_matter {
218 continue;
219 }
220
221 // Quick length check first
222 if line_info.byte_len > effective_config.line_length.get() {
223 candidate_lines.push(line_idx);
224 }
225 }
226 }
227
228 // If no candidate lines and not in normalize or sentence-per-line mode, early return
229 if candidate_lines.is_empty()
230 && !(effective_config.reflow
231 && (effective_config.reflow_mode == ReflowMode::Normalize
232 || effective_config.reflow_mode == ReflowMode::SentencePerLine
233 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
234 {
235 return Ok(warnings);
236 }
237
238 let lines = ctx.raw_lines();
239
240 // Create a quick lookup set for heading lines
241 // We need this for both the heading skip check AND the paragraphs check
242 let heading_lines_set: std::collections::HashSet<usize> = ctx
243 .lines
244 .iter()
245 .enumerate()
246 .filter(|(_, line)| line.heading.is_some())
247 .map(|(idx, _)| idx + 1)
248 .collect();
249
250 // Use pre-computed table blocks from context
251 // We need this for both the table skip check AND the paragraphs check
252 let table_blocks = &ctx.table_blocks;
253 let mut table_lines_set = std::collections::HashSet::new();
254 for table in table_blocks {
255 table_lines_set.insert(table.header_line + 1);
256 table_lines_set.insert(table.delimiter_line + 1);
257 for &line in &table.content_lines {
258 table_lines_set.insert(line + 1);
259 }
260 }
261
262 // Process candidate lines for line length checks
263 for &line_idx in &candidate_lines {
264 let line_number = line_idx + 1;
265 let line = lines[line_idx];
266
267 // Calculate actual line length (used in warning messages)
268 let effective_length = self.calculate_effective_length(line);
269
270 // Use single line length limit for all content
271 let line_limit = effective_config.line_length.get();
272
273 // In non-strict mode, forgive the trailing non-whitespace run.
274 // If the line only exceeds the limit because of a long token at the end
275 // (URL, link chain, identifier), it passes. This matches markdownlint's
276 // behavior: line.replace(/\S*$/u, "#")
277 let check_length = if effective_config.strict {
278 effective_length
279 } else {
280 match line.rfind(char::is_whitespace) {
281 Some(pos) => {
282 let ws_char = line[pos..].chars().next().unwrap();
283 let prefix_end = pos + ws_char.len_utf8();
284 self.calculate_string_length(&line[..prefix_end]) + 1
285 }
286 None => 1, // No whitespace — entire line is a single token
287 }
288 };
289
290 // Skip lines where the check length is within the limit
291 if check_length <= line_limit {
292 continue;
293 }
294
295 // Semantic link understanding: suppress when excess comes entirely from inline URLs
296 if !effective_config.strict {
297 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
298 if text_only_length <= line_limit {
299 continue;
300 }
301 }
302
303 // Skip mkdocstrings blocks (already handled by LintContext)
304 if ctx.lines[line_idx].in_mkdocstrings {
305 continue;
306 }
307
308 // Link reference definitions are always exempt, even in strict mode.
309 // There's no way to shorten them without breaking the URL.
310 // Also check after stripping list markers, since list items may
311 // contain link ref defs as their content.
312 {
313 let trimmed = line.trim();
314 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
315 continue;
316 }
317 if is_list_item(trimmed) {
318 let (_, content) = extract_list_marker_and_content(trimmed);
319 let content_trimmed = content.trim();
320 if content_trimmed.starts_with('[')
321 && content_trimmed.contains("]:")
322 && LINK_REF_PATTERN.is_match(content_trimmed)
323 {
324 continue;
325 }
326 }
327 }
328
329 // Skip various block types efficiently
330 if !effective_config.strict {
331 // Lines whose only content is a link/image are exempt.
332 // After stripping list markers, blockquote markers, and emphasis,
333 // if only a link or image remains, there is no way to shorten it.
334 if is_standalone_link_or_image_line(line) {
335 continue;
336 }
337
338 // Skip setext heading underlines
339 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
340 continue;
341 }
342
343 // Skip block elements according to config flags
344 // The flags mean: true = check these elements, false = skip these elements
345 // So we skip when the flag is FALSE and the line is in that element type
346 if (!effective_config.headings && heading_lines_set.contains(&line_number))
347 || (!effective_config.code_blocks
348 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
349 || (!effective_config.tables && table_lines_set.contains(&line_number))
350 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
351 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
352 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
353 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
354 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
355 {
356 continue;
357 }
358
359 // Check if this is a paragraph/regular text line
360 // If paragraphs = false, skip lines that are NOT in special blocks
361 if !effective_config.paragraphs {
362 let is_special_block = heading_lines_set.contains(&line_number)
363 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
364 || table_lines_set.contains(&line_number)
365 || ctx.lines[line_number - 1].blockquote.is_some()
366 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
367 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
368 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
369 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
370 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
371 || ctx
372 .line_info(line_number)
373 .is_some_and(|info| info.in_mkdocs_container());
374
375 // Skip regular paragraph text when paragraphs = false
376 if !is_special_block {
377 continue;
378 }
379 }
380
381 // Skip lines that are only a URL, image ref, or link ref
382 if self.should_ignore_line(line, lines, line_idx, ctx) {
383 continue;
384 }
385 }
386
387 // In sentence-per-line mode, check if this is a single long sentence
388 // If so, emit a warning without a fix (user must manually rephrase)
389 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
390 let sentences = split_into_sentences(line.trim());
391 if sentences.len() == 1 {
392 // Single sentence that's too long - warn but don't auto-fix
393 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
394
395 let (start_line, start_col, end_line, end_col) =
396 calculate_excess_range(line_number, line, line_limit);
397
398 warnings.push(LintWarning {
399 rule_name: Some(self.name().to_string()),
400 message,
401 line: start_line,
402 column: start_col,
403 end_line,
404 end_column: end_col,
405 severity: Severity::Warning,
406 fix: None, // No auto-fix for long single sentences
407 });
408 continue;
409 }
410 // Multiple sentences will be handled by paragraph-based reflow
411 continue;
412 }
413
414 // In semantic-line-breaks mode, skip per-line checks —
415 // all reflow is handled at the paragraph level with cascading splits
416 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
417 continue;
418 }
419
420 // Don't provide fix for individual lines when reflow is enabled
421 // Paragraph-based fixes will be handled separately
422 let fix = None;
423
424 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
425
426 // Calculate precise character range for the excess portion
427 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
428
429 warnings.push(LintWarning {
430 rule_name: Some(self.name().to_string()),
431 message,
432 line: start_line,
433 column: start_col,
434 end_line,
435 end_column: end_col,
436 severity: Severity::Warning,
437 fix,
438 });
439 }
440
441 // If reflow is enabled, generate paragraph-based fixes
442 if effective_config.reflow {
443 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
444 // Merge paragraph warnings with line warnings, removing duplicates
445 for pw in paragraph_warnings {
446 // Remove any line warnings that overlap with this paragraph
447 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
448 warnings.push(pw);
449 }
450 }
451
452 Ok(warnings)
453 }
454
455 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
456 // For CLI usage, apply fixes from warnings
457 // LSP will use the warning-based fixes directly
458 let warnings = self.check(ctx)?;
459
460 // If there are no fixes, return content unchanged
461 if !warnings.iter().any(|w| w.fix.is_some()) {
462 return Ok(ctx.content.to_string());
463 }
464
465 // Apply warning-based fixes
466 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
467 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
468 }
469
470 fn as_any(&self) -> &dyn std::any::Any {
471 self
472 }
473
474 fn category(&self) -> RuleCategory {
475 RuleCategory::Whitespace
476 }
477
478 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
479 self.should_skip_with_config(ctx, &self.config)
480 }
481
482 fn default_config_section(&self) -> Option<(String, toml::Value)> {
483 let default_config = MD013Config::default();
484 let json_value = serde_json::to_value(&default_config).ok()?;
485 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
486
487 if let toml::Value::Table(table) = toml_value {
488 if !table.is_empty() {
489 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
490 } else {
491 None
492 }
493 } else {
494 None
495 }
496 }
497
498 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
499 let mut aliases = std::collections::HashMap::new();
500 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
501 Some(aliases)
502 }
503
504 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
505 where
506 Self: Sized,
507 {
508 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
509 // Use global line_length if rule-specific config still has default value
510 if rule_config.line_length.get() == 80 {
511 rule_config.line_length = config.global.line_length;
512 }
513 Box::new(Self::from_config_struct(rule_config))
514 }
515}
516
517impl MD013LineLength {
518 fn is_blockquote_content_boundary(
519 &self,
520 content: &str,
521 line_num: usize,
522 ctx: &crate::lint_context::LintContext,
523 ) -> bool {
524 let trimmed = content.trim();
525
526 trimmed.is_empty()
527 || ctx.line_info(line_num).is_some_and(|info| {
528 info.in_code_block
529 || info.in_front_matter
530 || info.in_html_block
531 || info.in_html_comment
532 || info.in_esm_block
533 || info.in_jsx_expression
534 || info.in_mdx_comment
535 || info.in_mkdocstrings
536 || info.in_mkdocs_container()
537 || info.is_div_marker
538 })
539 || trimmed.starts_with('#')
540 || trimmed.starts_with("```")
541 || trimmed.starts_with("~~~")
542 || trimmed.starts_with('>')
543 || TableUtils::is_potential_table_row(content)
544 || is_list_item(trimmed)
545 || is_horizontal_rule(trimmed)
546 || (trimmed.starts_with('[') && content.contains("]:"))
547 || is_template_directive_only(content)
548 || is_standalone_attr_list(content)
549 || is_snippet_block_delimiter(content)
550 || is_github_alert_marker(trimmed)
551 }
552
553 fn generate_blockquote_paragraph_fix(
554 &self,
555 ctx: &crate::lint_context::LintContext,
556 config: &MD013Config,
557 lines: &[&str],
558 line_index: &LineIndex,
559 start_idx: usize,
560 line_ending: &str,
561 ) -> (Option<LintWarning>, usize) {
562 let Some(start_bq) = ctx.lines.get(start_idx).and_then(|line| line.blockquote.as_deref()) else {
563 return (None, start_idx + 1);
564 };
565 let target_level = start_bq.nesting_level;
566
567 let mut collected: Vec<CollectedBlockquoteLine> = Vec::new();
568 let mut i = start_idx;
569
570 while i < lines.len() {
571 if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].data.content) {
572 break;
573 }
574
575 let line_num = i + 1;
576 if line_num > ctx.lines.len() {
577 break;
578 }
579
580 if lines[i].trim().is_empty() {
581 break;
582 }
583
584 let line_bq = ctx.lines[i].blockquote.as_deref();
585 if let Some(bq) = line_bq {
586 if bq.nesting_level != target_level {
587 break;
588 }
589
590 if self.is_blockquote_content_boundary(&bq.content, line_num, ctx) {
591 break;
592 }
593
594 collected.push(CollectedBlockquoteLine {
595 line_idx: i,
596 data: BlockquoteLineData::explicit(trim_preserving_hard_break(&bq.content), bq.prefix.clone()),
597 });
598 i += 1;
599 continue;
600 }
601
602 let lazy_content = lines[i].trim_start();
603 if self.is_blockquote_content_boundary(lazy_content, line_num, ctx) {
604 break;
605 }
606
607 collected.push(CollectedBlockquoteLine {
608 line_idx: i,
609 data: BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content)),
610 });
611 i += 1;
612 }
613
614 if collected.is_empty() {
615 return (None, start_idx + 1);
616 }
617
618 let next_idx = i;
619 let paragraph_start = collected[0].line_idx;
620 let end_line = collected[collected.len() - 1].line_idx;
621 let line_data: Vec<BlockquoteLineData> = collected.iter().map(|l| l.data.clone()).collect();
622 let paragraph_text = line_data
623 .iter()
624 .map(|d| d.content.as_str())
625 .collect::<Vec<_>>()
626 .join(" ");
627
628 let contains_definition_list = line_data
629 .iter()
630 .any(|d| crate::utils::is_definition_list_item(&d.content));
631 if contains_definition_list {
632 return (None, next_idx);
633 }
634
635 let contains_snippets = line_data.iter().any(|d| is_snippet_block_delimiter(&d.content));
636 if contains_snippets {
637 return (None, next_idx);
638 }
639
640 let needs_reflow = match config.reflow_mode {
641 ReflowMode::Normalize => line_data.len() > 1,
642 ReflowMode::SentencePerLine => {
643 let sentences = split_into_sentences(¶graph_text);
644 sentences.len() > 1 || line_data.len() > 1
645 }
646 ReflowMode::SemanticLineBreaks => {
647 let sentences = split_into_sentences(¶graph_text);
648 sentences.len() > 1
649 || line_data.len() > 1
650 || collected
651 .iter()
652 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get())
653 }
654 ReflowMode::Default => collected
655 .iter()
656 .any(|l| self.calculate_effective_length(lines[l.line_idx]) > config.line_length.get()),
657 };
658
659 if !needs_reflow {
660 return (None, next_idx);
661 }
662
663 let fallback_prefix = start_bq.prefix.clone();
664 let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
665 let continuation_style = blockquote_continuation_style(&line_data);
666
667 let reflow_line_length = if config.line_length.is_unlimited() {
668 usize::MAX
669 } else {
670 config
671 .line_length
672 .get()
673 .saturating_sub(self.calculate_string_length(&explicit_prefix))
674 .max(1)
675 };
676
677 let reflow_options = crate::utils::text_reflow::ReflowOptions {
678 line_length: reflow_line_length,
679 break_on_sentences: true,
680 preserve_breaks: false,
681 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
682 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
683 abbreviations: config.abbreviations_for_reflow(),
684 length_mode: self.reflow_length_mode(),
685 };
686
687 let reflowed_with_style =
688 reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &reflow_options);
689
690 if reflowed_with_style.is_empty() {
691 return (None, next_idx);
692 }
693
694 let reflowed_text = reflowed_with_style.join(line_ending);
695
696 let start_range = line_index.whole_line_range(paragraph_start + 1);
697 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
698 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
699 } else {
700 line_index.whole_line_range(end_line + 1)
701 };
702 let byte_range = start_range.start..end_range.end;
703
704 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
705 format!("{reflowed_text}{line_ending}")
706 } else {
707 reflowed_text
708 };
709
710 let original_text = &ctx.content[byte_range.clone()];
711 if original_text == replacement {
712 return (None, next_idx);
713 }
714
715 let (warning_line, warning_end_line) = match config.reflow_mode {
716 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
717 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => (paragraph_start + 1, end_line + 1),
718 ReflowMode::Default => {
719 let violating_line = collected
720 .iter()
721 .find(|line| self.calculate_effective_length(lines[line.line_idx]) > config.line_length.get())
722 .map(|line| line.line_idx + 1)
723 .unwrap_or(paragraph_start + 1);
724 (violating_line, violating_line)
725 }
726 };
727
728 let warning = LintWarning {
729 rule_name: Some(self.name().to_string()),
730 message: match config.reflow_mode {
731 ReflowMode::Normalize => format!(
732 "Paragraph could be normalized to use line length of {} characters",
733 config.line_length.get()
734 ),
735 ReflowMode::SentencePerLine => {
736 let num_sentences = split_into_sentences(¶graph_text).len();
737 if line_data.len() == 1 {
738 format!("Line contains {num_sentences} sentences (one sentence per line required)")
739 } else {
740 let num_lines = line_data.len();
741 format!(
742 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
743 )
744 }
745 }
746 ReflowMode::SemanticLineBreaks => {
747 let num_sentences = split_into_sentences(¶graph_text).len();
748 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
749 }
750 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
751 },
752 line: warning_line,
753 column: 1,
754 end_line: warning_end_line,
755 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
756 severity: Severity::Warning,
757 fix: Some(crate::rule::Fix {
758 range: byte_range,
759 replacement,
760 }),
761 };
762
763 (Some(warning), next_idx)
764 }
765
766 /// Generate paragraph-based fixes
767 fn generate_paragraph_fixes(
768 &self,
769 ctx: &crate::lint_context::LintContext,
770 config: &MD013Config,
771 lines: &[&str],
772 ) -> Vec<LintWarning> {
773 let mut warnings = Vec::new();
774 let line_index = LineIndex::new(ctx.content);
775
776 // Detect the content's line ending style to preserve it in replacements.
777 // The LSP receives content from editors which may use CRLF (Windows).
778 // Replacements must match the original line endings to avoid false positives.
779 let line_ending = crate::utils::line_ending::detect_line_ending(ctx.content);
780
781 let mut i = 0;
782 while i < lines.len() {
783 let line_num = i + 1;
784
785 // Handle blockquote paragraphs with style-preserving reflow.
786 if line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some() {
787 let (warning, next_idx) =
788 self.generate_blockquote_paragraph_fix(ctx, config, lines, &line_index, i, line_ending);
789 if let Some(warning) = warning {
790 warnings.push(warning);
791 }
792 i = next_idx;
793 continue;
794 }
795
796 // Skip special structures (but NOT MkDocs containers - those get special handling)
797 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
798 info.in_code_block
799 || info.in_front_matter
800 || info.in_html_block
801 || info.in_html_comment
802 || info.in_esm_block
803 || info.in_jsx_expression
804 || info.in_mdx_comment
805 || info.in_mkdocstrings
806 });
807
808 if should_skip_due_to_line_info
809 || lines[i].trim().starts_with('#')
810 || TableUtils::is_potential_table_row(lines[i])
811 || lines[i].trim().is_empty()
812 || is_horizontal_rule(lines[i].trim())
813 || is_template_directive_only(lines[i])
814 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
815 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
816 {
817 i += 1;
818 continue;
819 }
820
821 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
822 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
823 // Skip admonition/tab marker lines — only reflow their indented content
824 let current_line = lines[i];
825 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
826 i += 1;
827 continue;
828 }
829
830 let container_start = i;
831
832 // Detect the actual indent level from the first content line
833 // (supports nested admonitions with 8+ spaces)
834 let first_line = lines[i];
835 let base_indent_len = first_line.len() - first_line.trim_start().len();
836 let base_indent: String = " ".repeat(base_indent_len);
837
838 // Collect consecutive MkDocs container paragraph lines
839 let mut container_lines: Vec<&str> = Vec::new();
840 while i < lines.len() {
841 let current_line_num = i + 1;
842 let line_info = ctx.line_info(current_line_num);
843
844 // Stop if we leave the MkDocs container
845 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
846 break;
847 }
848
849 let line = lines[i];
850
851 // Stop at paragraph boundaries within the container
852 if line.trim().is_empty() {
853 break;
854 }
855
856 // Skip list items, code blocks, headings within containers
857 if is_list_item(line.trim())
858 || line.trim().starts_with("```")
859 || line.trim().starts_with("~~~")
860 || line.trim().starts_with('#')
861 {
862 break;
863 }
864
865 container_lines.push(line);
866 i += 1;
867 }
868
869 if container_lines.is_empty() {
870 // Must advance i to avoid infinite loop when we encounter
871 // non-paragraph content (code block, list, heading, empty line)
872 // at the start of an MkDocs container
873 i += 1;
874 continue;
875 }
876
877 // Strip the base indent from each line and join for reflow
878 let stripped_lines: Vec<&str> = container_lines
879 .iter()
880 .map(|line| {
881 if line.starts_with(&base_indent) {
882 &line[base_indent_len..]
883 } else {
884 line.trim_start()
885 }
886 })
887 .collect();
888 let paragraph_text = stripped_lines.join(" ");
889
890 // Check if reflow is needed
891 let needs_reflow = match config.reflow_mode {
892 ReflowMode::Normalize => container_lines.len() > 1,
893 ReflowMode::SentencePerLine => {
894 let sentences = split_into_sentences(¶graph_text);
895 sentences.len() > 1 || container_lines.len() > 1
896 }
897 ReflowMode::SemanticLineBreaks => {
898 let sentences = split_into_sentences(¶graph_text);
899 sentences.len() > 1
900 || container_lines.len() > 1
901 || container_lines
902 .iter()
903 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
904 }
905 ReflowMode::Default => container_lines
906 .iter()
907 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
908 };
909
910 if !needs_reflow {
911 continue;
912 }
913
914 // Calculate byte range for this container paragraph
915 let start_range = line_index.whole_line_range(container_start + 1);
916 let end_line = container_start + container_lines.len() - 1;
917 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
918 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
919 } else {
920 line_index.whole_line_range(end_line + 1)
921 };
922 let byte_range = start_range.start..end_range.end;
923
924 // Reflow with adjusted line length (accounting for the 4-space indent)
925 let reflow_line_length = if config.line_length.is_unlimited() {
926 usize::MAX
927 } else {
928 config.line_length.get().saturating_sub(base_indent_len).max(1)
929 };
930 let reflow_options = crate::utils::text_reflow::ReflowOptions {
931 line_length: reflow_line_length,
932 break_on_sentences: true,
933 preserve_breaks: false,
934 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
935 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
936 abbreviations: config.abbreviations_for_reflow(),
937 length_mode: self.reflow_length_mode(),
938 };
939 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
940
941 // Re-add the 4-space indent to each reflowed line
942 let reflowed_with_indent: Vec<String> =
943 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
944 let reflowed_text = reflowed_with_indent.join(line_ending);
945
946 // Preserve trailing newline
947 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
948 format!("{reflowed_text}{line_ending}")
949 } else {
950 reflowed_text
951 };
952
953 // Only generate a warning if the replacement is different
954 let original_text = &ctx.content[byte_range.clone()];
955 if original_text != replacement {
956 warnings.push(LintWarning {
957 rule_name: Some(self.name().to_string()),
958 message: format!(
959 "Line length {} exceeds {} characters (in MkDocs container)",
960 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
961 config.line_length.get()
962 ),
963 line: container_start + 1,
964 column: 1,
965 end_line: end_line + 1,
966 end_column: lines[end_line].len() + 1,
967 severity: Severity::Warning,
968 fix: Some(crate::rule::Fix {
969 range: byte_range,
970 replacement,
971 }),
972 });
973 }
974 continue;
975 }
976
977 // Helper function to detect semantic line markers
978 let is_semantic_line = |content: &str| -> bool {
979 let trimmed = content.trim_start();
980 let semantic_markers = [
981 "NOTE:",
982 "WARNING:",
983 "IMPORTANT:",
984 "CAUTION:",
985 "TIP:",
986 "DANGER:",
987 "HINT:",
988 "INFO:",
989 ];
990 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
991 };
992
993 // Helper function to detect fence markers (opening or closing)
994 let is_fence_marker = |content: &str| -> bool {
995 let trimmed = content.trim_start();
996 trimmed.starts_with("```") || trimmed.starts_with("~~~")
997 };
998
999 // Check if this is a list item - handle it specially
1000 let trimmed = lines[i].trim();
1001 if is_list_item(trimmed) {
1002 // Collect the entire list item including continuation lines
1003 let list_start = i;
1004 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
1005 let marker_len = marker.len();
1006
1007 // MkDocs flavor requires at least 4 spaces for list continuation
1008 let min_continuation_indent = if ctx.flavor.requires_strict_list_indent() {
1009 marker_len.max(4)
1010 } else {
1011 marker_len
1012 };
1013
1014 // Track lines and their types (content, code block, fence, nested list)
1015 #[derive(Clone)]
1016 enum LineType {
1017 Content(String),
1018 CodeBlock(String, usize), // content and original indent
1019 NestedListItem(String, usize), // full line content and original indent
1020 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
1021 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
1022 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
1023 AdmonitionHeader(String, usize), // header text (e.g. "!!! note") and original indent
1024 AdmonitionContent(String, usize), // body content text and original indent
1025 Empty,
1026 }
1027
1028 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
1029 i += 1;
1030
1031 // Collect continuation lines using ctx.lines for metadata
1032 while i < lines.len() {
1033 let line_info = &ctx.lines[i];
1034
1035 // Use pre-computed is_blank from ctx
1036 if line_info.is_blank {
1037 // Empty line - check if next line is indented (part of list item)
1038 if i + 1 < lines.len() {
1039 let next_info = &ctx.lines[i + 1];
1040
1041 // Check if next line is indented enough to be continuation
1042 if !next_info.is_blank && next_info.indent >= min_continuation_indent {
1043 // This blank line is between paragraphs/blocks in the list item
1044 list_item_lines.push(LineType::Empty);
1045 i += 1;
1046 continue;
1047 }
1048 }
1049 // No indented line after blank, end of list item
1050 break;
1051 }
1052
1053 // Use pre-computed indent from ctx
1054 let indent = line_info.indent;
1055
1056 // Valid continuation must be indented at least min_continuation_indent
1057 if indent >= min_continuation_indent {
1058 let trimmed = line_info.content(ctx.content).trim();
1059
1060 // Use pre-computed in_code_block from ctx
1061 if line_info.in_code_block {
1062 list_item_lines.push(LineType::CodeBlock(
1063 line_info.content(ctx.content)[indent..].to_string(),
1064 indent,
1065 ));
1066 i += 1;
1067 continue;
1068 }
1069
1070 // Check for MkDocs admonition lines inside list items.
1071 // The flavor detection marks these with in_admonition, so we
1072 // can classify them as admonition header or body content.
1073 if line_info.in_admonition {
1074 let raw_content = line_info.content(ctx.content);
1075 if mkdocs_admonitions::is_admonition_start(raw_content) {
1076 let header_text = raw_content[indent..].trim_end().to_string();
1077 list_item_lines.push(LineType::AdmonitionHeader(header_text, indent));
1078 } else {
1079 let body_text = raw_content[indent..].trim_end().to_string();
1080 list_item_lines.push(LineType::AdmonitionContent(body_text, indent));
1081 }
1082 i += 1;
1083 continue;
1084 }
1085
1086 // Check if this is a SIBLING list item (breaks parent)
1087 // Nested lists are indented >= marker_len and are PART of the parent item
1088 // Siblings are at indent < marker_len (at or before parent marker)
1089 if is_list_item(trimmed) && indent < marker_len {
1090 // This is a sibling item at same or higher level - end parent item
1091 break;
1092 }
1093
1094 // Check if this is a NESTED list item marker
1095 // Nested lists should be processed separately UNLESS they're part of a
1096 // multi-paragraph list item (indicated by a blank line before them OR
1097 // it's a continuation of an already-started nested list)
1098 if is_list_item(trimmed) && indent >= marker_len {
1099 // Check if there was a blank line before this (multi-paragraph context)
1100 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
1101
1102 // Check if we've already seen nested list content (another nested item)
1103 let has_nested_content = list_item_lines.iter().any(|line| {
1104 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
1105 || matches!(line, LineType::NestedListItem(_, _))
1106 });
1107
1108 if !has_blank_before && !has_nested_content {
1109 // Single-paragraph context with no prior nested items: starts a new item
1110 // End parent collection; nested list will be processed next
1111 break;
1112 }
1113 // else: multi-paragraph context or continuation of nested list, keep collecting
1114 // Mark this as a nested list item to preserve its structure
1115 list_item_lines.push(LineType::NestedListItem(
1116 line_info.content(ctx.content)[indent..].to_string(),
1117 indent,
1118 ));
1119 i += 1;
1120 continue;
1121 }
1122
1123 // Normal continuation vs indented code block
1124 if indent <= min_continuation_indent + 3 {
1125 // Extract content (remove indentation and trailing whitespace)
1126 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
1127 // See: https://github.com/rvben/rumdl/issues/76
1128 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
1129
1130 // Check if this is a div marker (::: opening or closing)
1131 // These must be preserved on their own line, not merged into paragraphs
1132 if line_info.is_div_marker {
1133 list_item_lines.push(LineType::DivMarker(content));
1134 }
1135 // Check if this is a fence marker (opening or closing)
1136 // These should be treated as code block lines, not paragraph content
1137 else if is_fence_marker(&content) {
1138 list_item_lines.push(LineType::CodeBlock(content, indent));
1139 }
1140 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
1141 else if is_semantic_line(&content) {
1142 list_item_lines.push(LineType::SemanticLine(content));
1143 }
1144 // Check if this is a snippet block delimiter (-8<- or --8<--)
1145 // These must be preserved on their own lines for MkDocs Snippets extension
1146 else if is_snippet_block_delimiter(&content) {
1147 list_item_lines.push(LineType::SnippetLine(content));
1148 } else {
1149 list_item_lines.push(LineType::Content(content));
1150 }
1151 i += 1;
1152 } else {
1153 // indent >= min_continuation_indent + 4: indented code block
1154 list_item_lines.push(LineType::CodeBlock(
1155 line_info.content(ctx.content)[indent..].to_string(),
1156 indent,
1157 ));
1158 i += 1;
1159 }
1160 } else {
1161 // Not indented enough, end of list item
1162 break;
1163 }
1164 }
1165
1166 let indent_size = min_continuation_indent;
1167 let expected_indent = " ".repeat(indent_size);
1168
1169 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
1170 #[derive(Clone)]
1171 enum Block {
1172 Paragraph(Vec<String>),
1173 Code {
1174 lines: Vec<(String, usize)>, // (content, indent) pairs
1175 has_preceding_blank: bool, // Whether there was a blank line before this block
1176 },
1177 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
1178 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
1179 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
1180 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
1181 Html {
1182 lines: Vec<String>, // HTML content preserved exactly as-is
1183 has_preceding_blank: bool, // Whether there was a blank line before this block
1184 },
1185 Admonition {
1186 header: String, // e.g. "!!! note" or "??? warning \"Title\""
1187 header_indent: usize, // original indent of the header line
1188 content_lines: Vec<(String, usize)>, // (text, original_indent) pairs for body lines
1189 },
1190 }
1191
1192 // HTML tag detection helpers
1193 // Block-level HTML tags that should trigger HTML block detection
1194 const BLOCK_LEVEL_TAGS: &[&str] = &[
1195 "div",
1196 "details",
1197 "summary",
1198 "section",
1199 "article",
1200 "header",
1201 "footer",
1202 "nav",
1203 "aside",
1204 "main",
1205 "table",
1206 "thead",
1207 "tbody",
1208 "tfoot",
1209 "tr",
1210 "td",
1211 "th",
1212 "ul",
1213 "ol",
1214 "li",
1215 "dl",
1216 "dt",
1217 "dd",
1218 "pre",
1219 "blockquote",
1220 "figure",
1221 "figcaption",
1222 "form",
1223 "fieldset",
1224 "legend",
1225 "hr",
1226 "p",
1227 "h1",
1228 "h2",
1229 "h3",
1230 "h4",
1231 "h5",
1232 "h6",
1233 "style",
1234 "script",
1235 "noscript",
1236 ];
1237
1238 fn is_block_html_opening_tag(line: &str) -> Option<String> {
1239 let trimmed = line.trim();
1240
1241 // Check for HTML comments
1242 if trimmed.starts_with("<!--") {
1243 return Some("!--".to_string());
1244 }
1245
1246 // Check for opening tags
1247 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
1248 // Extract tag name from <tagname ...> or <tagname>
1249 let after_bracket = &trimmed[1..];
1250 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
1251 let tag_name = after_bracket[..end].to_lowercase();
1252
1253 // Only treat as block if it's a known block-level tag
1254 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
1255 return Some(tag_name);
1256 }
1257 }
1258 }
1259 None
1260 }
1261
1262 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
1263 let trimmed = line.trim();
1264
1265 // Special handling for HTML comments
1266 if tag_name == "!--" {
1267 return trimmed.ends_with("-->");
1268 }
1269
1270 // Check for closing tags: </tagname> or </tagname ...>
1271 trimmed.starts_with(&format!("</{tag_name}>"))
1272 || trimmed.starts_with(&format!("</{tag_name} "))
1273 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
1274 }
1275
1276 fn is_self_closing_tag(line: &str) -> bool {
1277 let trimmed = line.trim();
1278 trimmed.ends_with("/>")
1279 }
1280
1281 let mut blocks: Vec<Block> = Vec::new();
1282 let mut current_paragraph: Vec<String> = Vec::new();
1283 let mut current_code_block: Vec<(String, usize)> = Vec::new();
1284 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
1285 let mut current_html_block: Vec<String> = Vec::new();
1286 let mut html_tag_stack: Vec<String> = Vec::new();
1287 let mut in_code = false;
1288 let mut in_nested_list = false;
1289 let mut in_html_block = false;
1290 let mut had_preceding_blank = false; // Track if we just saw an empty line
1291 let mut code_block_has_preceding_blank = false; // Track blank before current code block
1292 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
1293
1294 // Track admonition context for block building
1295 let mut in_admonition_block = false;
1296 let mut admonition_header: Option<(String, usize)> = None; // (header_text, indent)
1297 let mut admonition_content: Vec<(String, usize)> = Vec::new();
1298
1299 // Flush any pending admonition block into `blocks`
1300 let flush_admonition = |blocks: &mut Vec<Block>,
1301 in_admonition: &mut bool,
1302 header: &mut Option<(String, usize)>,
1303 content: &mut Vec<(String, usize)>| {
1304 if *in_admonition {
1305 if let Some((h, hi)) = header.take() {
1306 blocks.push(Block::Admonition {
1307 header: h,
1308 header_indent: hi,
1309 content_lines: std::mem::take(content),
1310 });
1311 }
1312 *in_admonition = false;
1313 }
1314 };
1315
1316 for line in &list_item_lines {
1317 match line {
1318 LineType::Empty => {
1319 if in_admonition_block {
1320 // Blank lines inside admonitions separate paragraphs within the body
1321 admonition_content.push((String::new(), 0));
1322 } else if in_code {
1323 current_code_block.push((String::new(), 0));
1324 } else if in_nested_list {
1325 current_nested_list.push((String::new(), 0));
1326 } else if in_html_block {
1327 // Allow blank lines inside HTML blocks
1328 current_html_block.push(String::new());
1329 } else if !current_paragraph.is_empty() {
1330 blocks.push(Block::Paragraph(current_paragraph.clone()));
1331 current_paragraph.clear();
1332 }
1333 // Mark that we saw a blank line
1334 had_preceding_blank = true;
1335 }
1336 LineType::Content(content) => {
1337 flush_admonition(
1338 &mut blocks,
1339 &mut in_admonition_block,
1340 &mut admonition_header,
1341 &mut admonition_content,
1342 );
1343 // Check if we're currently in an HTML block
1344 if in_html_block {
1345 current_html_block.push(content.clone());
1346
1347 // Check if this line closes any open HTML tags
1348 if let Some(last_tag) = html_tag_stack.last() {
1349 if is_html_closing_tag(content, last_tag) {
1350 html_tag_stack.pop();
1351
1352 // If stack is empty, HTML block is complete
1353 if html_tag_stack.is_empty() {
1354 blocks.push(Block::Html {
1355 lines: current_html_block.clone(),
1356 has_preceding_blank: html_block_has_preceding_blank,
1357 });
1358 current_html_block.clear();
1359 in_html_block = false;
1360 }
1361 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1362 // Nested opening tag within HTML block
1363 if !is_self_closing_tag(content) {
1364 html_tag_stack.push(new_tag);
1365 }
1366 }
1367 }
1368 had_preceding_blank = false;
1369 } else {
1370 // Not in HTML block - check if this line starts one
1371 if let Some(tag_name) = is_block_html_opening_tag(content) {
1372 // Flush current paragraph before starting HTML block
1373 if in_code {
1374 blocks.push(Block::Code {
1375 lines: current_code_block.clone(),
1376 has_preceding_blank: code_block_has_preceding_blank,
1377 });
1378 current_code_block.clear();
1379 in_code = false;
1380 } else if in_nested_list {
1381 blocks.push(Block::NestedList(current_nested_list.clone()));
1382 current_nested_list.clear();
1383 in_nested_list = false;
1384 } else if !current_paragraph.is_empty() {
1385 blocks.push(Block::Paragraph(current_paragraph.clone()));
1386 current_paragraph.clear();
1387 }
1388
1389 // Start new HTML block
1390 in_html_block = true;
1391 html_block_has_preceding_blank = had_preceding_blank;
1392 current_html_block.push(content.clone());
1393
1394 // Check if it's self-closing or needs a closing tag
1395 if is_self_closing_tag(content) {
1396 // Self-closing tag - complete the HTML block immediately
1397 blocks.push(Block::Html {
1398 lines: current_html_block.clone(),
1399 has_preceding_blank: html_block_has_preceding_blank,
1400 });
1401 current_html_block.clear();
1402 in_html_block = false;
1403 } else {
1404 // Regular opening tag - push to stack
1405 html_tag_stack.push(tag_name);
1406 }
1407 } else {
1408 // Regular content line - add to paragraph
1409 if in_code {
1410 // Switching from code to content
1411 blocks.push(Block::Code {
1412 lines: current_code_block.clone(),
1413 has_preceding_blank: code_block_has_preceding_blank,
1414 });
1415 current_code_block.clear();
1416 in_code = false;
1417 } else if in_nested_list {
1418 // Switching from nested list to content
1419 blocks.push(Block::NestedList(current_nested_list.clone()));
1420 current_nested_list.clear();
1421 in_nested_list = false;
1422 }
1423 current_paragraph.push(content.clone());
1424 }
1425 had_preceding_blank = false; // Reset after content
1426 }
1427 }
1428 LineType::CodeBlock(content, indent) => {
1429 flush_admonition(
1430 &mut blocks,
1431 &mut in_admonition_block,
1432 &mut admonition_header,
1433 &mut admonition_content,
1434 );
1435 if in_nested_list {
1436 // Switching from nested list to code
1437 blocks.push(Block::NestedList(current_nested_list.clone()));
1438 current_nested_list.clear();
1439 in_nested_list = false;
1440 } else if in_html_block {
1441 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1442 blocks.push(Block::Html {
1443 lines: current_html_block.clone(),
1444 has_preceding_blank: html_block_has_preceding_blank,
1445 });
1446 current_html_block.clear();
1447 html_tag_stack.clear();
1448 in_html_block = false;
1449 }
1450 if !in_code {
1451 // Switching from content to code
1452 if !current_paragraph.is_empty() {
1453 blocks.push(Block::Paragraph(current_paragraph.clone()));
1454 current_paragraph.clear();
1455 }
1456 in_code = true;
1457 // Record whether there was a blank line before this code block
1458 code_block_has_preceding_blank = had_preceding_blank;
1459 }
1460 current_code_block.push((content.clone(), *indent));
1461 had_preceding_blank = false; // Reset after code
1462 }
1463 LineType::NestedListItem(content, indent) => {
1464 flush_admonition(
1465 &mut blocks,
1466 &mut in_admonition_block,
1467 &mut admonition_header,
1468 &mut admonition_content,
1469 );
1470 if in_code {
1471 // Switching from code to nested list
1472 blocks.push(Block::Code {
1473 lines: current_code_block.clone(),
1474 has_preceding_blank: code_block_has_preceding_blank,
1475 });
1476 current_code_block.clear();
1477 in_code = false;
1478 } else if in_html_block {
1479 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1480 blocks.push(Block::Html {
1481 lines: current_html_block.clone(),
1482 has_preceding_blank: html_block_has_preceding_blank,
1483 });
1484 current_html_block.clear();
1485 html_tag_stack.clear();
1486 in_html_block = false;
1487 }
1488 if !in_nested_list {
1489 // Switching from content to nested list
1490 if !current_paragraph.is_empty() {
1491 blocks.push(Block::Paragraph(current_paragraph.clone()));
1492 current_paragraph.clear();
1493 }
1494 in_nested_list = true;
1495 }
1496 current_nested_list.push((content.clone(), *indent));
1497 had_preceding_blank = false; // Reset after nested list
1498 }
1499 LineType::SemanticLine(content) => {
1500 // Semantic lines are standalone - flush any current block and add as separate block
1501 flush_admonition(
1502 &mut blocks,
1503 &mut in_admonition_block,
1504 &mut admonition_header,
1505 &mut admonition_content,
1506 );
1507 if in_code {
1508 blocks.push(Block::Code {
1509 lines: current_code_block.clone(),
1510 has_preceding_blank: code_block_has_preceding_blank,
1511 });
1512 current_code_block.clear();
1513 in_code = false;
1514 } else if in_nested_list {
1515 blocks.push(Block::NestedList(current_nested_list.clone()));
1516 current_nested_list.clear();
1517 in_nested_list = false;
1518 } else if in_html_block {
1519 blocks.push(Block::Html {
1520 lines: current_html_block.clone(),
1521 has_preceding_blank: html_block_has_preceding_blank,
1522 });
1523 current_html_block.clear();
1524 html_tag_stack.clear();
1525 in_html_block = false;
1526 } else if !current_paragraph.is_empty() {
1527 blocks.push(Block::Paragraph(current_paragraph.clone()));
1528 current_paragraph.clear();
1529 }
1530 // Add semantic line as its own block
1531 blocks.push(Block::SemanticLine(content.clone()));
1532 had_preceding_blank = false; // Reset after semantic line
1533 }
1534 LineType::SnippetLine(content) => {
1535 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1536 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1537 flush_admonition(
1538 &mut blocks,
1539 &mut in_admonition_block,
1540 &mut admonition_header,
1541 &mut admonition_content,
1542 );
1543 if in_code {
1544 blocks.push(Block::Code {
1545 lines: current_code_block.clone(),
1546 has_preceding_blank: code_block_has_preceding_blank,
1547 });
1548 current_code_block.clear();
1549 in_code = false;
1550 } else if in_nested_list {
1551 blocks.push(Block::NestedList(current_nested_list.clone()));
1552 current_nested_list.clear();
1553 in_nested_list = false;
1554 } else if in_html_block {
1555 blocks.push(Block::Html {
1556 lines: current_html_block.clone(),
1557 has_preceding_blank: html_block_has_preceding_blank,
1558 });
1559 current_html_block.clear();
1560 html_tag_stack.clear();
1561 in_html_block = false;
1562 } else if !current_paragraph.is_empty() {
1563 blocks.push(Block::Paragraph(current_paragraph.clone()));
1564 current_paragraph.clear();
1565 }
1566 // Add snippet line as its own block
1567 blocks.push(Block::SnippetLine(content.clone()));
1568 had_preceding_blank = false;
1569 }
1570 LineType::DivMarker(content) => {
1571 // Div markers (::: opening or closing) are standalone structural delimiters
1572 // Flush any current block and add as separate block
1573 flush_admonition(
1574 &mut blocks,
1575 &mut in_admonition_block,
1576 &mut admonition_header,
1577 &mut admonition_content,
1578 );
1579 if in_code {
1580 blocks.push(Block::Code {
1581 lines: current_code_block.clone(),
1582 has_preceding_blank: code_block_has_preceding_blank,
1583 });
1584 current_code_block.clear();
1585 in_code = false;
1586 } else if in_nested_list {
1587 blocks.push(Block::NestedList(current_nested_list.clone()));
1588 current_nested_list.clear();
1589 in_nested_list = false;
1590 } else if in_html_block {
1591 blocks.push(Block::Html {
1592 lines: current_html_block.clone(),
1593 has_preceding_blank: html_block_has_preceding_blank,
1594 });
1595 current_html_block.clear();
1596 html_tag_stack.clear();
1597 in_html_block = false;
1598 } else if !current_paragraph.is_empty() {
1599 blocks.push(Block::Paragraph(current_paragraph.clone()));
1600 current_paragraph.clear();
1601 }
1602 blocks.push(Block::DivMarker(content.clone()));
1603 had_preceding_blank = false;
1604 }
1605 LineType::AdmonitionHeader(header_text, indent) => {
1606 flush_admonition(
1607 &mut blocks,
1608 &mut in_admonition_block,
1609 &mut admonition_header,
1610 &mut admonition_content,
1611 );
1612 // Flush other current blocks
1613 if in_code {
1614 blocks.push(Block::Code {
1615 lines: current_code_block.clone(),
1616 has_preceding_blank: code_block_has_preceding_blank,
1617 });
1618 current_code_block.clear();
1619 in_code = false;
1620 } else if in_nested_list {
1621 blocks.push(Block::NestedList(current_nested_list.clone()));
1622 current_nested_list.clear();
1623 in_nested_list = false;
1624 } else if in_html_block {
1625 blocks.push(Block::Html {
1626 lines: current_html_block.clone(),
1627 has_preceding_blank: html_block_has_preceding_blank,
1628 });
1629 current_html_block.clear();
1630 html_tag_stack.clear();
1631 in_html_block = false;
1632 } else if !current_paragraph.is_empty() {
1633 blocks.push(Block::Paragraph(current_paragraph.clone()));
1634 current_paragraph.clear();
1635 }
1636 // Start new admonition block
1637 in_admonition_block = true;
1638 admonition_header = Some((header_text.clone(), *indent));
1639 admonition_content.clear();
1640 had_preceding_blank = false;
1641 }
1642 LineType::AdmonitionContent(content, indent) => {
1643 if in_admonition_block {
1644 // Add to current admonition body
1645 admonition_content.push((content.clone(), *indent));
1646 } else {
1647 // Admonition content without a header should not happen,
1648 // but treat it as regular content to avoid data loss
1649 current_paragraph.push(content.clone());
1650 }
1651 had_preceding_blank = false;
1652 }
1653 }
1654 }
1655
1656 // Push all remaining pending blocks independently
1657 flush_admonition(
1658 &mut blocks,
1659 &mut in_admonition_block,
1660 &mut admonition_header,
1661 &mut admonition_content,
1662 );
1663 if in_code && !current_code_block.is_empty() {
1664 blocks.push(Block::Code {
1665 lines: current_code_block,
1666 has_preceding_blank: code_block_has_preceding_blank,
1667 });
1668 }
1669 if in_nested_list && !current_nested_list.is_empty() {
1670 blocks.push(Block::NestedList(current_nested_list));
1671 }
1672 if in_html_block && !current_html_block.is_empty() {
1673 blocks.push(Block::Html {
1674 lines: current_html_block,
1675 has_preceding_blank: html_block_has_preceding_blank,
1676 });
1677 }
1678 if !current_paragraph.is_empty() {
1679 blocks.push(Block::Paragraph(current_paragraph));
1680 }
1681
1682 // Helper: check if a line (raw source or stripped content) is exempt
1683 // from line-length checks. Link reference definitions are always exempt;
1684 // standalone link/image lines are exempt when strict mode is off.
1685 // Also checks content after stripping list markers, since list item
1686 // continuation lines may contain link ref defs.
1687 let is_exempt_line = |raw_line: &str| -> bool {
1688 let trimmed = raw_line.trim();
1689 // Link reference definitions: always exempt
1690 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
1691 return true;
1692 }
1693 // Also check after stripping list markers (for list item content)
1694 if is_list_item(trimmed) {
1695 let (_, content) = extract_list_marker_and_content(trimmed);
1696 let content_trimmed = content.trim();
1697 if content_trimmed.starts_with('[')
1698 && content_trimmed.contains("]:")
1699 && LINK_REF_PATTERN.is_match(content_trimmed)
1700 {
1701 return true;
1702 }
1703 }
1704 // Standalone link/image lines: exempt when not strict
1705 if !config.strict && is_standalone_link_or_image_line(raw_line) {
1706 return true;
1707 }
1708 false
1709 };
1710
1711 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1712 // Exclude link reference definitions and standalone link lines from content
1713 // so they don't pollute combined_content or trigger false reflow.
1714 let content_lines: Vec<String> = list_item_lines
1715 .iter()
1716 .filter_map(|line| {
1717 if let LineType::Content(s) = line {
1718 if is_exempt_line(s) {
1719 return None;
1720 }
1721 Some(s.clone())
1722 } else {
1723 None
1724 }
1725 })
1726 .collect();
1727
1728 // Check if we need to reflow this list item
1729 // We check the combined content to see if it exceeds length limits
1730 let combined_content = content_lines.join(" ").trim().to_string();
1731
1732 // Helper to check if we should reflow in normalize mode
1733 let should_normalize = || {
1734 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1735 // DO normalize if it has plain text content that spans multiple lines
1736 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1737 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1738 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1739 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1740 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1741 let has_admonitions = blocks.iter().any(|b| matches!(b, Block::Admonition { .. }));
1742 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1743
1744 // If we have structural blocks but no paragraphs, don't normalize
1745 if (has_nested_lists
1746 || has_code_blocks
1747 || has_semantic_lines
1748 || has_snippet_lines
1749 || has_div_markers
1750 || has_admonitions)
1751 && !has_paragraphs
1752 {
1753 return false;
1754 }
1755
1756 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1757 if has_paragraphs {
1758 // Count only paragraphs that contain at least one non-exempt line.
1759 // Paragraphs consisting entirely of link ref defs or standalone links
1760 // should not trigger normalization.
1761 let paragraph_count = blocks
1762 .iter()
1763 .filter(|b| {
1764 if let Block::Paragraph(para_lines) = b {
1765 !para_lines.iter().all(|line| is_exempt_line(line))
1766 } else {
1767 false
1768 }
1769 })
1770 .count();
1771 if paragraph_count > 1 {
1772 // Multiple non-exempt paragraph blocks should be normalized
1773 return true;
1774 }
1775
1776 // Single paragraph block: normalize if it has multiple content lines
1777 if content_lines.len() > 1 {
1778 return true;
1779 }
1780 }
1781
1782 false
1783 };
1784
1785 let needs_reflow = match config.reflow_mode {
1786 ReflowMode::Normalize => {
1787 // Only reflow if:
1788 // 1. Any non-exempt paragraph, when joined, exceeds the limit, OR
1789 // 2. Any admonition content line exceeds the limit, OR
1790 // 3. The list item should be normalized (has multi-line plain text)
1791 let any_paragraph_exceeds = blocks.iter().any(|block| match block {
1792 Block::Paragraph(para_lines) => {
1793 if para_lines.iter().all(|line| is_exempt_line(line)) {
1794 return false;
1795 }
1796 let joined = para_lines.join(" ");
1797 let with_marker = format!("{}{}", " ".repeat(indent_size), joined.trim());
1798 self.calculate_effective_length(&with_marker) > config.line_length.get()
1799 }
1800 Block::Admonition {
1801 content_lines,
1802 header_indent,
1803 ..
1804 } => content_lines.iter().any(|(content, indent)| {
1805 if content.is_empty() {
1806 return false;
1807 }
1808 let with_indent = format!("{}{}", " ".repeat(*indent.max(header_indent)), content);
1809 self.calculate_effective_length(&with_indent) > config.line_length.get()
1810 }),
1811 _ => false,
1812 });
1813 if any_paragraph_exceeds {
1814 true
1815 } else {
1816 should_normalize()
1817 }
1818 }
1819 ReflowMode::SentencePerLine => {
1820 // Check if list item has multiple sentences
1821 let sentences = split_into_sentences(&combined_content);
1822 sentences.len() > 1
1823 }
1824 ReflowMode::SemanticLineBreaks => {
1825 let sentences = split_into_sentences(&combined_content);
1826 sentences.len() > 1
1827 || (list_start..i).any(|line_idx| {
1828 let line = lines[line_idx];
1829 let trimmed = line.trim();
1830 if trimmed.is_empty() || is_exempt_line(line) {
1831 return false;
1832 }
1833 self.calculate_effective_length(line) > config.line_length.get()
1834 })
1835 }
1836 ReflowMode::Default => {
1837 // In default mode, only reflow if any individual non-exempt line exceeds limit
1838 (list_start..i).any(|line_idx| {
1839 let line = lines[line_idx];
1840 let trimmed = line.trim();
1841 // Skip blank lines and exempt lines
1842 if trimmed.is_empty() || is_exempt_line(line) {
1843 return false;
1844 }
1845 self.calculate_effective_length(line) > config.line_length.get()
1846 })
1847 }
1848 };
1849
1850 if needs_reflow {
1851 let start_range = line_index.whole_line_range(list_start + 1);
1852 let end_line = i - 1;
1853 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1854 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1855 } else {
1856 line_index.whole_line_range(end_line + 1)
1857 };
1858 let byte_range = start_range.start..end_range.end;
1859
1860 // Reflow each block (paragraphs only, preserve code blocks)
1861 // When line_length = 0 (no limit), use a very large value for reflow
1862 let reflow_line_length = if config.line_length.is_unlimited() {
1863 usize::MAX
1864 } else {
1865 config.line_length.get().saturating_sub(indent_size).max(1)
1866 };
1867 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1868 line_length: reflow_line_length,
1869 break_on_sentences: true,
1870 preserve_breaks: false,
1871 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1872 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1873 abbreviations: config.abbreviations_for_reflow(),
1874 length_mode: self.reflow_length_mode(),
1875 };
1876
1877 let mut result: Vec<String> = Vec::new();
1878 let mut is_first_block = true;
1879
1880 for (block_idx, block) in blocks.iter().enumerate() {
1881 match block {
1882 Block::Paragraph(para_lines) => {
1883 // If every line in this paragraph is exempt (link ref defs,
1884 // standalone links), preserve the paragraph verbatim instead
1885 // of reflowing it. Reflowing would corrupt link ref defs.
1886 let all_exempt = para_lines.iter().all(|line| is_exempt_line(line));
1887
1888 if all_exempt {
1889 for (idx, line) in para_lines.iter().enumerate() {
1890 if is_first_block && idx == 0 {
1891 result.push(format!("{marker}{line}"));
1892 is_first_block = false;
1893 } else {
1894 result.push(format!("{expected_indent}{line}"));
1895 }
1896 }
1897 } else {
1898 // Split the paragraph into segments at hard break boundaries
1899 // Each segment can be reflowed independently
1900 let segments = split_into_segments(para_lines);
1901
1902 for (segment_idx, segment) in segments.iter().enumerate() {
1903 // Check if this segment ends with a hard break and what type
1904 let hard_break_type = segment.last().and_then(|line| {
1905 let line = line.strip_suffix('\r').unwrap_or(line);
1906 if line.ends_with('\\') {
1907 Some("\\")
1908 } else if line.ends_with(" ") {
1909 Some(" ")
1910 } else {
1911 None
1912 }
1913 });
1914
1915 // Join and reflow the segment (removing the hard break marker for processing)
1916 let segment_for_reflow: Vec<String> = segment
1917 .iter()
1918 .map(|line| {
1919 // Strip hard break marker (2 spaces or backslash) for reflow processing
1920 if line.ends_with('\\') {
1921 line[..line.len() - 1].trim_end().to_string()
1922 } else if line.ends_with(" ") {
1923 line[..line.len() - 2].trim_end().to_string()
1924 } else {
1925 line.clone()
1926 }
1927 })
1928 .collect();
1929
1930 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1931 if !segment_text.is_empty() {
1932 let reflowed =
1933 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1934
1935 if is_first_block && segment_idx == 0 {
1936 // First segment of first block starts with marker
1937 result.push(format!("{marker}{}", reflowed[0]));
1938 for line in reflowed.iter().skip(1) {
1939 result.push(format!("{expected_indent}{line}"));
1940 }
1941 is_first_block = false;
1942 } else {
1943 // Subsequent segments
1944 for line in reflowed {
1945 result.push(format!("{expected_indent}{line}"));
1946 }
1947 }
1948
1949 // If this segment had a hard break, add it back to the last line
1950 // Preserve the original hard break format (backslash or two spaces)
1951 if let Some(break_marker) = hard_break_type
1952 && let Some(last_line) = result.last_mut()
1953 {
1954 last_line.push_str(break_marker);
1955 }
1956 }
1957 }
1958 }
1959
1960 // Add blank line after paragraph block if there's a next block.
1961 // Check if next block is a code block that doesn't want a preceding blank.
1962 // Also don't add blank lines before snippet lines (they should stay tight).
1963 // Only add if not already ending with one (avoids double blanks).
1964 if block_idx < blocks.len() - 1 {
1965 let next_block = &blocks[block_idx + 1];
1966 let should_add_blank = match next_block {
1967 Block::Code {
1968 has_preceding_blank, ..
1969 } => *has_preceding_blank,
1970 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1971 _ => true, // For all other blocks, add blank line
1972 };
1973 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
1974 {
1975 result.push(String::new());
1976 }
1977 }
1978 }
1979 Block::Code {
1980 lines: code_lines,
1981 has_preceding_blank: _,
1982 } => {
1983 // Preserve code blocks as-is with original indentation
1984 // NOTE: Blank line before code block is handled by the previous block
1985 // (see paragraph block's logic above)
1986
1987 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1988 if is_first_block && idx == 0 {
1989 // First line of first block gets marker
1990 result.push(format!(
1991 "{marker}{}",
1992 " ".repeat(orig_indent - marker_len) + content
1993 ));
1994 is_first_block = false;
1995 } else if content.is_empty() {
1996 result.push(String::new());
1997 } else {
1998 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1999 }
2000 }
2001 }
2002 Block::NestedList(nested_items) => {
2003 // Preserve nested list items as-is with original indentation.
2004 // Only add blank before if not already ending with one (avoids
2005 // double blanks when the preceding block already added one).
2006 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
2007 result.push(String::new());
2008 }
2009
2010 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
2011 if is_first_block && idx == 0 {
2012 // First line of first block gets marker
2013 result.push(format!(
2014 "{marker}{}",
2015 " ".repeat(orig_indent - marker_len) + content
2016 ));
2017 is_first_block = false;
2018 } else if content.is_empty() {
2019 result.push(String::new());
2020 } else {
2021 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
2022 }
2023 }
2024
2025 // Add blank line after nested list if there's a next block.
2026 // Only add if not already ending with one (avoids double blanks
2027 // when the last nested item was already a blank line).
2028 if block_idx < blocks.len() - 1 {
2029 let next_block = &blocks[block_idx + 1];
2030 let should_add_blank = match next_block {
2031 Block::Code {
2032 has_preceding_blank, ..
2033 } => *has_preceding_blank,
2034 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2035 _ => true, // For all other blocks, add blank line
2036 };
2037 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2038 {
2039 result.push(String::new());
2040 }
2041 }
2042 }
2043 Block::SemanticLine(content) => {
2044 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line.
2045 // Only add blank before if not already ending with one.
2046 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
2047 result.push(String::new());
2048 }
2049
2050 if is_first_block {
2051 // First block starts with marker
2052 result.push(format!("{marker}{content}"));
2053 is_first_block = false;
2054 } else {
2055 // Subsequent blocks use expected indent
2056 result.push(format!("{expected_indent}{content}"));
2057 }
2058
2059 // Add blank line after semantic line if there's a next block.
2060 // Only add if not already ending with one.
2061 if block_idx < blocks.len() - 1 {
2062 let next_block = &blocks[block_idx + 1];
2063 let should_add_blank = match next_block {
2064 Block::Code {
2065 has_preceding_blank, ..
2066 } => *has_preceding_blank,
2067 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2068 _ => true, // For all other blocks, add blank line
2069 };
2070 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2071 {
2072 result.push(String::new());
2073 }
2074 }
2075 }
2076 Block::SnippetLine(content) => {
2077 // Preserve snippet delimiters (-8<-) as-is on their own line
2078 // Unlike semantic lines, snippet lines don't add extra blank lines
2079 if is_first_block {
2080 // First block starts with marker
2081 result.push(format!("{marker}{content}"));
2082 is_first_block = false;
2083 } else {
2084 // Subsequent blocks use expected indent
2085 result.push(format!("{expected_indent}{content}"));
2086 }
2087 // No blank lines added before or after snippet delimiters
2088 }
2089 Block::DivMarker(content) => {
2090 // Preserve div markers (::: opening or closing) as-is on their own line
2091 if is_first_block {
2092 result.push(format!("{marker}{content}"));
2093 is_first_block = false;
2094 } else {
2095 result.push(format!("{expected_indent}{content}"));
2096 }
2097 }
2098 Block::Html {
2099 lines: html_lines,
2100 has_preceding_blank: _,
2101 } => {
2102 // Preserve HTML blocks exactly as-is with original indentation
2103 // NOTE: Blank line before HTML block is handled by the previous block
2104
2105 for (idx, line) in html_lines.iter().enumerate() {
2106 if is_first_block && idx == 0 {
2107 // First line of first block gets marker
2108 result.push(format!("{marker}{line}"));
2109 is_first_block = false;
2110 } else if line.is_empty() {
2111 // Preserve blank lines inside HTML blocks
2112 result.push(String::new());
2113 } else {
2114 // Preserve lines with their original content (already includes indentation)
2115 result.push(format!("{expected_indent}{line}"));
2116 }
2117 }
2118
2119 // Add blank line after HTML block if there's a next block.
2120 // Only add if not already ending with one (avoids double blanks
2121 // when the HTML block itself contained a trailing blank line).
2122 if block_idx < blocks.len() - 1 {
2123 let next_block = &blocks[block_idx + 1];
2124 let should_add_blank = match next_block {
2125 Block::Code {
2126 has_preceding_blank, ..
2127 } => *has_preceding_blank,
2128 Block::Html {
2129 has_preceding_blank, ..
2130 } => *has_preceding_blank,
2131 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2132 _ => true, // For all other blocks, add blank line
2133 };
2134 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2135 {
2136 result.push(String::new());
2137 }
2138 }
2139 }
2140 Block::Admonition {
2141 header,
2142 header_indent,
2143 content_lines: admon_lines,
2144 } => {
2145 // Reconstruct admonition block with header at original indent
2146 // and body content reflowed to fit within the line length limit
2147
2148 // Add blank line before admonition if not first block
2149 if !is_first_block && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true) {
2150 result.push(String::new());
2151 }
2152
2153 // Output the header at its original indent
2154 let header_indent_str = " ".repeat(*header_indent);
2155 if is_first_block {
2156 result.push(format!(
2157 "{marker}{}",
2158 " ".repeat(header_indent.saturating_sub(marker_len)) + header
2159 ));
2160 is_first_block = false;
2161 } else {
2162 result.push(format!("{header_indent_str}{header}"));
2163 }
2164
2165 // Derive body indent from the first non-empty content line's
2166 // stored indent, falling back to header_indent + 4 for
2167 // empty-body admonitions
2168 let body_indent = admon_lines
2169 .iter()
2170 .find(|(content, _)| !content.is_empty())
2171 .map(|(_, indent)| *indent)
2172 .unwrap_or(header_indent + 4);
2173 let body_indent_str = " ".repeat(body_indent);
2174
2175 // Collect body content into paragraphs separated by blank lines
2176 let mut body_paragraphs: Vec<Vec<String>> = Vec::new();
2177 let mut current_para: Vec<String> = Vec::new();
2178
2179 for (content, _orig_indent) in admon_lines {
2180 if content.is_empty() {
2181 if !current_para.is_empty() {
2182 body_paragraphs.push(current_para.clone());
2183 current_para.clear();
2184 }
2185 } else {
2186 current_para.push(content.clone());
2187 }
2188 }
2189 if !current_para.is_empty() {
2190 body_paragraphs.push(current_para);
2191 }
2192
2193 // Reflow each paragraph in the body
2194 for paragraph in &body_paragraphs {
2195 // Add blank line before each paragraph (including the first, after the header)
2196 result.push(String::new());
2197
2198 let paragraph_text = paragraph.join(" ").trim().to_string();
2199 if paragraph_text.is_empty() {
2200 continue;
2201 }
2202
2203 // Reflow with adjusted line length
2204 let admon_reflow_length = if config.line_length.is_unlimited() {
2205 usize::MAX
2206 } else {
2207 config.line_length.get().saturating_sub(body_indent).max(1)
2208 };
2209
2210 let admon_reflow_options = crate::utils::text_reflow::ReflowOptions {
2211 line_length: admon_reflow_length,
2212 break_on_sentences: true,
2213 preserve_breaks: false,
2214 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2215 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2216 abbreviations: config.abbreviations_for_reflow(),
2217 length_mode: self.reflow_length_mode(),
2218 };
2219
2220 let reflowed =
2221 crate::utils::text_reflow::reflow_line(¶graph_text, &admon_reflow_options);
2222 for line in &reflowed {
2223 result.push(format!("{body_indent_str}{line}"));
2224 }
2225 }
2226
2227 // Add blank line after admonition if there's a next block
2228 if block_idx < blocks.len() - 1 {
2229 let next_block = &blocks[block_idx + 1];
2230 let should_add_blank = match next_block {
2231 Block::Code {
2232 has_preceding_blank, ..
2233 } => *has_preceding_blank,
2234 Block::SnippetLine(_) | Block::DivMarker(_) => false,
2235 _ => true,
2236 };
2237 if should_add_blank && result.last().map(|s: &String| !s.is_empty()).unwrap_or(true)
2238 {
2239 result.push(String::new());
2240 }
2241 }
2242 }
2243 }
2244 }
2245
2246 let reflowed_text = result.join(line_ending);
2247
2248 // Preserve trailing newline
2249 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2250 format!("{reflowed_text}{line_ending}")
2251 } else {
2252 reflowed_text
2253 };
2254
2255 // Get the original text to compare
2256 let original_text = &ctx.content[byte_range.clone()];
2257
2258 // Only generate a warning if the replacement is different from the original
2259 if original_text != replacement {
2260 // Generate an appropriate message based on why reflow is needed
2261 let message = match config.reflow_mode {
2262 ReflowMode::SentencePerLine => {
2263 let num_sentences = split_into_sentences(&combined_content).len();
2264 let num_lines = content_lines.len();
2265 if num_lines == 1 {
2266 // Single line with multiple sentences
2267 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2268 } else {
2269 // Multiple lines - could be split sentences or mixed
2270 format!(
2271 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
2272 )
2273 }
2274 }
2275 ReflowMode::SemanticLineBreaks => {
2276 let num_sentences = split_into_sentences(&combined_content).len();
2277 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
2278 }
2279 ReflowMode::Normalize => {
2280 // Find the longest non-exempt paragraph when joined
2281 let max_para_length = blocks
2282 .iter()
2283 .filter_map(|block| {
2284 if let Block::Paragraph(para_lines) = block {
2285 if para_lines.iter().all(|line| is_exempt_line(line)) {
2286 return None;
2287 }
2288 let joined = para_lines.join(" ");
2289 let with_indent = format!("{}{}", " ".repeat(indent_size), joined.trim());
2290 Some(self.calculate_effective_length(&with_indent))
2291 } else {
2292 None
2293 }
2294 })
2295 .max()
2296 .unwrap_or(0);
2297 if max_para_length > config.line_length.get() {
2298 format!(
2299 "Line length {} exceeds {} characters",
2300 max_para_length,
2301 config.line_length.get()
2302 )
2303 } else {
2304 "Multi-line content can be normalized".to_string()
2305 }
2306 }
2307 ReflowMode::Default => {
2308 // Report the actual longest non-exempt line, not the combined content
2309 let max_length = (list_start..i)
2310 .filter(|&line_idx| {
2311 let line = lines[line_idx];
2312 let trimmed = line.trim();
2313 !trimmed.is_empty() && !is_exempt_line(line)
2314 })
2315 .map(|line_idx| self.calculate_effective_length(lines[line_idx]))
2316 .max()
2317 .unwrap_or(0);
2318 format!(
2319 "Line length {} exceeds {} characters",
2320 max_length,
2321 config.line_length.get()
2322 )
2323 }
2324 };
2325
2326 warnings.push(LintWarning {
2327 rule_name: Some(self.name().to_string()),
2328 message,
2329 line: list_start + 1,
2330 column: 1,
2331 end_line: end_line + 1,
2332 end_column: lines[end_line].len() + 1,
2333 severity: Severity::Warning,
2334 fix: Some(crate::rule::Fix {
2335 range: byte_range,
2336 replacement,
2337 }),
2338 });
2339 }
2340 }
2341 continue;
2342 }
2343
2344 // Found start of a paragraph - collect all lines in it
2345 let paragraph_start = i;
2346 let mut paragraph_lines = vec![lines[i]];
2347 i += 1;
2348
2349 while i < lines.len() {
2350 let next_line = lines[i];
2351 let next_line_num = i + 1;
2352 let next_trimmed = next_line.trim();
2353
2354 // Stop at paragraph boundaries
2355 if next_trimmed.is_empty()
2356 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
2357 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
2358 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
2359 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
2360 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
2361 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
2362 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
2363 || ctx
2364 .line_info(next_line_num)
2365 .is_some_and(|info| info.in_mkdocs_container())
2366 || (next_line_num > 0
2367 && next_line_num <= ctx.lines.len()
2368 && ctx.lines[next_line_num - 1].blockquote.is_some())
2369 || next_trimmed.starts_with('#')
2370 || TableUtils::is_potential_table_row(next_line)
2371 || is_list_item(next_trimmed)
2372 || is_horizontal_rule(next_trimmed)
2373 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
2374 || is_template_directive_only(next_line)
2375 || is_standalone_attr_list(next_line)
2376 || is_snippet_block_delimiter(next_line)
2377 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
2378 {
2379 break;
2380 }
2381
2382 // Check if the previous line ends with a hard break (2+ spaces or backslash)
2383 if i > 0 && has_hard_break(lines[i - 1]) {
2384 // Don't include lines after hard breaks in the same paragraph
2385 break;
2386 }
2387
2388 paragraph_lines.push(next_line);
2389 i += 1;
2390 }
2391
2392 // Combine paragraph lines into a single string for processing
2393 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
2394 let paragraph_text = paragraph_lines.join(" ");
2395
2396 // Skip reflowing if this paragraph contains definition list items
2397 // Definition lists are multi-line structures that should not be joined
2398 let contains_definition_list = paragraph_lines
2399 .iter()
2400 .any(|line| crate::utils::is_definition_list_item(line));
2401
2402 if contains_definition_list {
2403 // Don't reflow definition lists - skip this paragraph
2404 i = paragraph_start + paragraph_lines.len();
2405 continue;
2406 }
2407
2408 // Skip reflowing if this paragraph contains MkDocs Snippets markers
2409 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
2410 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
2411
2412 if contains_snippets {
2413 // Don't reflow Snippets blocks - skip this paragraph
2414 i = paragraph_start + paragraph_lines.len();
2415 continue;
2416 }
2417
2418 // Check if this paragraph needs reflowing
2419 let needs_reflow = match config.reflow_mode {
2420 ReflowMode::Normalize => {
2421 // In normalize mode, reflow multi-line paragraphs
2422 paragraph_lines.len() > 1
2423 }
2424 ReflowMode::SentencePerLine => {
2425 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
2426 // Note: we check the joined text because sentences can span multiple lines
2427 let sentences = split_into_sentences(¶graph_text);
2428
2429 // Always reflow if multiple sentences on one line
2430 if sentences.len() > 1 {
2431 true
2432 } else if paragraph_lines.len() > 1 {
2433 // For single-sentence paragraphs spanning multiple lines:
2434 // Reflow if they COULD fit on one line (respecting line-length constraint)
2435 if config.line_length.is_unlimited() {
2436 // No line-length constraint - always join single sentences
2437 true
2438 } else {
2439 // Only join if it fits within line-length
2440 let effective_length = self.calculate_effective_length(¶graph_text);
2441 effective_length <= config.line_length.get()
2442 }
2443 } else {
2444 false
2445 }
2446 }
2447 ReflowMode::SemanticLineBreaks => {
2448 let sentences = split_into_sentences(¶graph_text);
2449 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
2450 sentences.len() > 1
2451 || paragraph_lines.len() > 1
2452 || paragraph_lines
2453 .iter()
2454 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2455 }
2456 ReflowMode::Default => {
2457 // In default mode, only reflow if lines exceed limit
2458 paragraph_lines
2459 .iter()
2460 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
2461 }
2462 };
2463
2464 if needs_reflow {
2465 // Calculate byte range for this paragraph
2466 // Use whole_line_range for each line and combine
2467 let start_range = line_index.whole_line_range(paragraph_start + 1);
2468 let end_line = paragraph_start + paragraph_lines.len() - 1;
2469
2470 // For the last line, we want to preserve any trailing newline
2471 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
2472 // Last line without trailing newline - use line_text_range
2473 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
2474 } else {
2475 // Not the last line or has trailing newline - use whole_line_range
2476 line_index.whole_line_range(end_line + 1)
2477 };
2478
2479 let byte_range = start_range.start..end_range.end;
2480
2481 // Check if the paragraph ends with a hard break and what type
2482 let hard_break_type = paragraph_lines.last().and_then(|line| {
2483 let line = line.strip_suffix('\r').unwrap_or(line);
2484 if line.ends_with('\\') {
2485 Some("\\")
2486 } else if line.ends_with(" ") {
2487 Some(" ")
2488 } else {
2489 None
2490 }
2491 });
2492
2493 // Reflow the paragraph
2494 // When line_length = 0 (no limit), use a very large value for reflow
2495 let reflow_line_length = if config.line_length.is_unlimited() {
2496 usize::MAX
2497 } else {
2498 config.line_length.get()
2499 };
2500 let reflow_options = crate::utils::text_reflow::ReflowOptions {
2501 line_length: reflow_line_length,
2502 break_on_sentences: true,
2503 preserve_breaks: false,
2504 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
2505 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
2506 abbreviations: config.abbreviations_for_reflow(),
2507 length_mode: self.reflow_length_mode(),
2508 };
2509 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
2510
2511 // If the original paragraph ended with a hard break, preserve it
2512 // Preserve the original hard break format (backslash or two spaces)
2513 if let Some(break_marker) = hard_break_type
2514 && !reflowed.is_empty()
2515 {
2516 let last_idx = reflowed.len() - 1;
2517 if !has_hard_break(&reflowed[last_idx]) {
2518 reflowed[last_idx].push_str(break_marker);
2519 }
2520 }
2521
2522 let reflowed_text = reflowed.join(line_ending);
2523
2524 // Preserve trailing newline if the original paragraph had one
2525 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
2526 format!("{reflowed_text}{line_ending}")
2527 } else {
2528 reflowed_text
2529 };
2530
2531 // Get the original text to compare
2532 let original_text = &ctx.content[byte_range.clone()];
2533
2534 // Only generate a warning if the replacement is different from the original
2535 if original_text != replacement {
2536 // Create warning with actual fix
2537 // In default mode, report the specific line that violates
2538 // In normalize mode, report the whole paragraph
2539 // In sentence-per-line mode, report the entire paragraph
2540 let (warning_line, warning_end_line) = match config.reflow_mode {
2541 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
2542 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
2543 // Highlight the entire paragraph that needs reformatting
2544 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
2545 }
2546 ReflowMode::Default => {
2547 // Find the first line that exceeds the limit
2548 let mut violating_line = paragraph_start;
2549 for (idx, line) in paragraph_lines.iter().enumerate() {
2550 if self.calculate_effective_length(line) > config.line_length.get() {
2551 violating_line = paragraph_start + idx;
2552 break;
2553 }
2554 }
2555 (violating_line + 1, violating_line + 1)
2556 }
2557 };
2558
2559 warnings.push(LintWarning {
2560 rule_name: Some(self.name().to_string()),
2561 message: match config.reflow_mode {
2562 ReflowMode::Normalize => format!(
2563 "Paragraph could be normalized to use line length of {} characters",
2564 config.line_length.get()
2565 ),
2566 ReflowMode::SentencePerLine => {
2567 let num_sentences = split_into_sentences(¶graph_text).len();
2568 if paragraph_lines.len() == 1 {
2569 // Single line with multiple sentences
2570 format!("Line contains {num_sentences} sentences (one sentence per line required)")
2571 } else {
2572 let num_lines = paragraph_lines.len();
2573 // Multiple lines - could be split sentences or mixed
2574 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
2575 }
2576 },
2577 ReflowMode::SemanticLineBreaks => {
2578 let num_sentences = split_into_sentences(¶graph_text).len();
2579 format!(
2580 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
2581 )
2582 },
2583 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
2584 },
2585 line: warning_line,
2586 column: 1,
2587 end_line: warning_end_line,
2588 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
2589 severity: Severity::Warning,
2590 fix: Some(crate::rule::Fix {
2591 range: byte_range,
2592 replacement,
2593 }),
2594 });
2595 }
2596 }
2597 }
2598
2599 warnings
2600 }
2601
2602 /// Calculate string length based on the configured length mode
2603 fn calculate_string_length(&self, s: &str) -> usize {
2604 match self.config.length_mode {
2605 LengthMode::Chars => s.chars().count(),
2606 LengthMode::Visual => s.width(),
2607 LengthMode::Bytes => s.len(),
2608 }
2609 }
2610
2611 /// Calculate effective line length
2612 ///
2613 /// Returns the actual display length of the line using the configured length mode.
2614 fn calculate_effective_length(&self, line: &str) -> usize {
2615 self.calculate_string_length(line)
2616 }
2617
2618 /// Calculate line length with inline link/image URLs removed.
2619 ///
2620 /// For each inline link `[text](url)` or image `` on the line,
2621 /// computes the "savings" from removing the URL portion (keeping only `[text]`
2622 /// or `![alt]`). Returns `effective_length - total_savings`.
2623 ///
2624 /// Handles nested constructs (e.g., `[](url)`) by only counting the
2625 /// outermost construct to avoid double-counting.
2626 fn calculate_text_only_length(
2627 &self,
2628 effective_length: usize,
2629 line_number: usize,
2630 ctx: &crate::lint_context::LintContext,
2631 ) -> usize {
2632 let line_range = ctx.line_index.line_content_range(line_number);
2633 let line_byte_end = line_range.end;
2634
2635 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
2636 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
2637
2638 for link in &ctx.links {
2639 if link.line != line_number || link.is_reference {
2640 continue;
2641 }
2642 if !matches!(link.link_type, LinkType::Inline) {
2643 continue;
2644 }
2645 // Skip cross-line links
2646 if link.byte_end > line_byte_end {
2647 continue;
2648 }
2649 // `[text]` in configured length mode
2650 let text_only_len = 2 + self.calculate_string_length(&link.text);
2651 constructs.push((link.byte_offset, link.byte_end, text_only_len));
2652 }
2653
2654 for image in &ctx.images {
2655 if image.line != line_number || image.is_reference {
2656 continue;
2657 }
2658 if !matches!(image.link_type, LinkType::Inline) {
2659 continue;
2660 }
2661 // Skip cross-line images
2662 if image.byte_end > line_byte_end {
2663 continue;
2664 }
2665 // `![alt]` in configured length mode
2666 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2667 constructs.push((image.byte_offset, image.byte_end, text_only_len));
2668 }
2669
2670 if constructs.is_empty() {
2671 return effective_length;
2672 }
2673
2674 // Sort by byte offset to handle overlapping/nested constructs
2675 constructs.sort_by_key(|&(start, _, _)| start);
2676
2677 let mut total_savings: usize = 0;
2678 let mut last_end: usize = 0;
2679
2680 for (start, end, text_only_len) in &constructs {
2681 // Skip constructs nested inside a previously counted one
2682 if *start < last_end {
2683 continue;
2684 }
2685 // Full construct length in configured length mode
2686 let full_source = &ctx.content[*start..*end];
2687 let full_len = self.calculate_string_length(full_source);
2688 total_savings += full_len.saturating_sub(*text_only_len);
2689 last_end = *end;
2690 }
2691
2692 effective_length.saturating_sub(total_savings)
2693 }
2694}