rumdl_lib/rules/md013_line_length/mod.rs
1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::mkdocs_admonitions;
7use crate::utils::mkdocs_attr_list::is_standalone_attr_list;
8use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
9use crate::utils::mkdocs_tabs;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::range_utils::calculate_excess_range;
12use crate::utils::regex_cache::{IMAGE_REF_PATTERN, LINK_REF_PATTERN, URL_PATTERN};
13use crate::utils::table_utils::TableUtils;
14use crate::utils::text_reflow::{ReflowLengthMode, split_into_sentences};
15use pulldown_cmark::LinkType;
16use toml;
17
18mod helpers;
19pub mod md013_config;
20use helpers::{
21 extract_list_marker_and_content, has_hard_break, is_horizontal_rule, is_list_item, is_template_directive_only,
22 split_into_segments, trim_preserving_hard_break,
23};
24pub use md013_config::MD013Config;
25use md013_config::{LengthMode, ReflowMode};
26
27#[cfg(test)]
28mod tests;
29use unicode_width::UnicodeWidthStr;
30
31#[derive(Clone, Default)]
32pub struct MD013LineLength {
33 pub(crate) config: MD013Config,
34}
35
36impl MD013LineLength {
37 pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
38 Self {
39 config: MD013Config {
40 line_length: crate::types::LineLength::new(line_length),
41 code_blocks,
42 tables,
43 headings,
44 paragraphs: true, // Default to true for backwards compatibility
45 strict,
46 reflow: false,
47 reflow_mode: ReflowMode::default(),
48 length_mode: LengthMode::default(),
49 abbreviations: Vec::new(),
50 },
51 }
52 }
53
54 pub fn from_config_struct(config: MD013Config) -> Self {
55 Self { config }
56 }
57
58 /// Convert MD013 LengthMode to text_reflow ReflowLengthMode
59 fn reflow_length_mode(&self) -> ReflowLengthMode {
60 match self.config.length_mode {
61 LengthMode::Chars => ReflowLengthMode::Chars,
62 LengthMode::Visual => ReflowLengthMode::Visual,
63 LengthMode::Bytes => ReflowLengthMode::Bytes,
64 }
65 }
66
67 fn should_ignore_line(
68 &self,
69 line: &str,
70 _lines: &[&str],
71 current_line: usize,
72 ctx: &crate::lint_context::LintContext,
73 ) -> bool {
74 if self.config.strict {
75 return false;
76 }
77
78 // Quick check for common patterns before expensive regex
79 let trimmed = line.trim();
80
81 // Only skip if the entire line is a URL (quick check first)
82 if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
83 return true;
84 }
85
86 // Only skip if the entire line is an image reference (quick check first)
87 if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
88 return true;
89 }
90
91 // Note: link reference definitions are handled as always-exempt (even in strict mode)
92 // in the main check loop, so they don't need to be checked here.
93
94 // Code blocks with long strings (only check if in code block)
95 if ctx.line_info(current_line + 1).is_some_and(|info| info.in_code_block)
96 && !trimmed.is_empty()
97 && !line.contains(' ')
98 && !line.contains('\t')
99 {
100 return true;
101 }
102
103 false
104 }
105
106 /// Check if rule should skip based on provided config (used for inline config support)
107 fn should_skip_with_config(&self, ctx: &crate::lint_context::LintContext, config: &MD013Config) -> bool {
108 // Skip if content is empty
109 if ctx.content.is_empty() {
110 return true;
111 }
112
113 // For sentence-per-line, semantic-line-breaks, or normalize mode, never skip based on line length
114 if config.reflow
115 && (config.reflow_mode == ReflowMode::SentencePerLine
116 || config.reflow_mode == ReflowMode::SemanticLineBreaks
117 || config.reflow_mode == ReflowMode::Normalize)
118 {
119 return false;
120 }
121
122 // Quick check: if total content is shorter than line limit, definitely skip
123 if ctx.content.len() <= config.line_length.get() {
124 return true;
125 }
126
127 // Skip if no line exceeds the limit
128 !ctx.lines.iter().any(|line| line.byte_len > config.line_length.get())
129 }
130}
131
132impl Rule for MD013LineLength {
133 fn name(&self) -> &'static str {
134 "MD013"
135 }
136
137 fn description(&self) -> &'static str {
138 "Line length should not be excessive"
139 }
140
141 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
142 // Use pre-parsed inline config from LintContext
143 let config_override = ctx.inline_config().get_rule_config("MD013");
144
145 // Apply configuration override if present
146 let effective_config = if let Some(json_config) = config_override {
147 if let Some(obj) = json_config.as_object() {
148 let mut config = self.config.clone();
149 if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
150 config.line_length = crate::types::LineLength::new(line_length as usize);
151 }
152 if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
153 config.code_blocks = code_blocks;
154 }
155 if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
156 config.tables = tables;
157 }
158 if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
159 config.headings = headings;
160 }
161 if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
162 config.strict = strict;
163 }
164 if let Some(reflow) = obj.get("reflow").and_then(|v| v.as_bool()) {
165 config.reflow = reflow;
166 }
167 if let Some(reflow_mode) = obj.get("reflow_mode").and_then(|v| v.as_str()) {
168 config.reflow_mode = match reflow_mode {
169 "default" => ReflowMode::Default,
170 "normalize" => ReflowMode::Normalize,
171 "sentence-per-line" => ReflowMode::SentencePerLine,
172 "semantic-line-breaks" => ReflowMode::SemanticLineBreaks,
173 _ => ReflowMode::default(),
174 };
175 }
176 config
177 } else {
178 self.config.clone()
179 }
180 } else {
181 self.config.clone()
182 };
183
184 // Fast early return using should_skip with EFFECTIVE config (after inline overrides)
185 // But don't skip if we're in reflow mode with Normalize or SentencePerLine
186 if self.should_skip_with_config(ctx, &effective_config)
187 && !(effective_config.reflow
188 && (effective_config.reflow_mode == ReflowMode::Normalize
189 || effective_config.reflow_mode == ReflowMode::SentencePerLine
190 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
191 {
192 return Ok(Vec::new());
193 }
194
195 // Direct implementation without DocumentStructure
196 let mut warnings = Vec::new();
197
198 // Special handling: line_length = 0 means "no line length limit"
199 // Skip all line length checks, but still allow reflow if enabled
200 let skip_length_checks = effective_config.line_length.is_unlimited();
201
202 // Pre-filter lines that could be problematic to avoid processing all lines
203 let mut candidate_lines = Vec::new();
204 if !skip_length_checks {
205 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
206 // Skip front matter - it should never be linted
207 if line_info.in_front_matter {
208 continue;
209 }
210
211 // Quick length check first
212 if line_info.byte_len > effective_config.line_length.get() {
213 candidate_lines.push(line_idx);
214 }
215 }
216 }
217
218 // If no candidate lines and not in normalize or sentence-per-line mode, early return
219 if candidate_lines.is_empty()
220 && !(effective_config.reflow
221 && (effective_config.reflow_mode == ReflowMode::Normalize
222 || effective_config.reflow_mode == ReflowMode::SentencePerLine
223 || effective_config.reflow_mode == ReflowMode::SemanticLineBreaks))
224 {
225 return Ok(warnings);
226 }
227
228 let lines = ctx.raw_lines();
229
230 // Create a quick lookup set for heading lines
231 // We need this for both the heading skip check AND the paragraphs check
232 let heading_lines_set: std::collections::HashSet<usize> = ctx
233 .lines
234 .iter()
235 .enumerate()
236 .filter(|(_, line)| line.heading.is_some())
237 .map(|(idx, _)| idx + 1)
238 .collect();
239
240 // Use pre-computed table blocks from context
241 // We need this for both the table skip check AND the paragraphs check
242 let table_blocks = &ctx.table_blocks;
243 let mut table_lines_set = std::collections::HashSet::new();
244 for table in table_blocks {
245 table_lines_set.insert(table.header_line + 1);
246 table_lines_set.insert(table.delimiter_line + 1);
247 for &line in &table.content_lines {
248 table_lines_set.insert(line + 1);
249 }
250 }
251
252 // Process candidate lines for line length checks
253 for &line_idx in &candidate_lines {
254 let line_number = line_idx + 1;
255 let line = lines[line_idx];
256
257 // Calculate actual line length (used in warning messages)
258 let effective_length = self.calculate_effective_length(line);
259
260 // Use single line length limit for all content
261 let line_limit = effective_config.line_length.get();
262
263 // In non-strict mode, forgive the trailing non-whitespace run.
264 // If the line only exceeds the limit because of a long token at the end
265 // (URL, link chain, identifier), it passes. This matches markdownlint's
266 // behavior: line.replace(/\S*$/u, "#")
267 let check_length = if effective_config.strict {
268 effective_length
269 } else {
270 match line.rfind(char::is_whitespace) {
271 Some(pos) => {
272 let ws_char = line[pos..].chars().next().unwrap();
273 let prefix_end = pos + ws_char.len_utf8();
274 self.calculate_string_length(&line[..prefix_end]) + 1
275 }
276 None => 1, // No whitespace — entire line is a single token
277 }
278 };
279
280 // Skip lines where the check length is within the limit
281 if check_length <= line_limit {
282 continue;
283 }
284
285 // Semantic link understanding: suppress when excess comes entirely from inline URLs
286 if !effective_config.strict {
287 let text_only_length = self.calculate_text_only_length(effective_length, line_number, ctx);
288 if text_only_length <= line_limit {
289 continue;
290 }
291 }
292
293 // Skip mkdocstrings blocks (already handled by LintContext)
294 if ctx.lines[line_idx].in_mkdocstrings {
295 continue;
296 }
297
298 // Link reference definitions are always exempt, even in strict mode.
299 // There's no way to shorten them without breaking the URL.
300 {
301 let trimmed = line.trim();
302 if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
303 continue;
304 }
305 }
306
307 // Skip various block types efficiently
308 if !effective_config.strict {
309 // Skip setext heading underlines
310 if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
311 continue;
312 }
313
314 // Skip block elements according to config flags
315 // The flags mean: true = check these elements, false = skip these elements
316 // So we skip when the flag is FALSE and the line is in that element type
317 if (!effective_config.headings && heading_lines_set.contains(&line_number))
318 || (!effective_config.code_blocks
319 && ctx.line_info(line_number).is_some_and(|info| info.in_code_block))
320 || (!effective_config.tables && table_lines_set.contains(&line_number))
321 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
322 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
323 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
324 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
325 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
326 {
327 continue;
328 }
329
330 // Check if this is a paragraph/regular text line
331 // If paragraphs = false, skip lines that are NOT in special blocks
332 if !effective_config.paragraphs {
333 let is_special_block = heading_lines_set.contains(&line_number)
334 || ctx.line_info(line_number).is_some_and(|info| info.in_code_block)
335 || table_lines_set.contains(&line_number)
336 || ctx.lines[line_number - 1].blockquote.is_some()
337 || ctx.line_info(line_number).is_some_and(|info| info.in_html_block)
338 || ctx.line_info(line_number).is_some_and(|info| info.in_html_comment)
339 || ctx.line_info(line_number).is_some_and(|info| info.in_esm_block)
340 || ctx.line_info(line_number).is_some_and(|info| info.in_jsx_expression)
341 || ctx.line_info(line_number).is_some_and(|info| info.in_mdx_comment)
342 || ctx
343 .line_info(line_number)
344 .is_some_and(|info| info.in_mkdocs_container());
345
346 // Skip regular paragraph text when paragraphs = false
347 if !is_special_block {
348 continue;
349 }
350 }
351
352 // Skip lines that are only a URL, image ref, or link ref
353 if self.should_ignore_line(line, lines, line_idx, ctx) {
354 continue;
355 }
356 }
357
358 // In sentence-per-line mode, check if this is a single long sentence
359 // If so, emit a warning without a fix (user must manually rephrase)
360 if effective_config.reflow_mode == ReflowMode::SentencePerLine {
361 let sentences = split_into_sentences(line.trim());
362 if sentences.len() == 1 {
363 // Single sentence that's too long - warn but don't auto-fix
364 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
365
366 let (start_line, start_col, end_line, end_col) =
367 calculate_excess_range(line_number, line, line_limit);
368
369 warnings.push(LintWarning {
370 rule_name: Some(self.name().to_string()),
371 message,
372 line: start_line,
373 column: start_col,
374 end_line,
375 end_column: end_col,
376 severity: Severity::Warning,
377 fix: None, // No auto-fix for long single sentences
378 });
379 continue;
380 }
381 // Multiple sentences will be handled by paragraph-based reflow
382 continue;
383 }
384
385 // In semantic-line-breaks mode, skip per-line checks —
386 // all reflow is handled at the paragraph level with cascading splits
387 if effective_config.reflow_mode == ReflowMode::SemanticLineBreaks {
388 continue;
389 }
390
391 // Don't provide fix for individual lines when reflow is enabled
392 // Paragraph-based fixes will be handled separately
393 let fix = None;
394
395 let message = format!("Line length {effective_length} exceeds {line_limit} characters");
396
397 // Calculate precise character range for the excess portion
398 let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
399
400 warnings.push(LintWarning {
401 rule_name: Some(self.name().to_string()),
402 message,
403 line: start_line,
404 column: start_col,
405 end_line,
406 end_column: end_col,
407 severity: Severity::Warning,
408 fix,
409 });
410 }
411
412 // If reflow is enabled, generate paragraph-based fixes
413 if effective_config.reflow {
414 let paragraph_warnings = self.generate_paragraph_fixes(ctx, &effective_config, lines);
415 // Merge paragraph warnings with line warnings, removing duplicates
416 for pw in paragraph_warnings {
417 // Remove any line warnings that overlap with this paragraph
418 warnings.retain(|w| w.line < pw.line || w.line > pw.end_line);
419 warnings.push(pw);
420 }
421 }
422
423 Ok(warnings)
424 }
425
426 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
427 // For CLI usage, apply fixes from warnings
428 // LSP will use the warning-based fixes directly
429 let warnings = self.check(ctx)?;
430
431 // If there are no fixes, return content unchanged
432 if !warnings.iter().any(|w| w.fix.is_some()) {
433 return Ok(ctx.content.to_string());
434 }
435
436 // Apply warning-based fixes
437 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
438 .map_err(|e| LintError::FixFailed(format!("Failed to apply fixes: {e}")))
439 }
440
441 fn as_any(&self) -> &dyn std::any::Any {
442 self
443 }
444
445 fn category(&self) -> RuleCategory {
446 RuleCategory::Whitespace
447 }
448
449 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
450 self.should_skip_with_config(ctx, &self.config)
451 }
452
453 fn default_config_section(&self) -> Option<(String, toml::Value)> {
454 let default_config = MD013Config::default();
455 let json_value = serde_json::to_value(&default_config).ok()?;
456 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
457
458 if let toml::Value::Table(table) = toml_value {
459 if !table.is_empty() {
460 Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
461 } else {
462 None
463 }
464 } else {
465 None
466 }
467 }
468
469 fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
470 let mut aliases = std::collections::HashMap::new();
471 aliases.insert("enable_reflow".to_string(), "reflow".to_string());
472 Some(aliases)
473 }
474
475 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
476 where
477 Self: Sized,
478 {
479 let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
480 // Use global line_length if rule-specific config still has default value
481 if rule_config.line_length.get() == 80 {
482 rule_config.line_length = config.global.line_length;
483 }
484 Box::new(Self::from_config_struct(rule_config))
485 }
486}
487
488impl MD013LineLength {
489 /// Generate paragraph-based fixes
490 fn generate_paragraph_fixes(
491 &self,
492 ctx: &crate::lint_context::LintContext,
493 config: &MD013Config,
494 lines: &[&str],
495 ) -> Vec<LintWarning> {
496 let mut warnings = Vec::new();
497 let line_index = LineIndex::new(ctx.content);
498
499 let mut i = 0;
500 while i < lines.len() {
501 let line_num = i + 1;
502
503 // Skip special structures (but NOT MkDocs containers - those get special handling)
504 let should_skip_due_to_line_info = ctx.line_info(line_num).is_some_and(|info| {
505 info.in_code_block
506 || info.in_front_matter
507 || info.in_html_block
508 || info.in_html_comment
509 || info.in_esm_block
510 || info.in_jsx_expression
511 || info.in_mdx_comment
512 || info.in_mkdocstrings
513 });
514
515 if should_skip_due_to_line_info
516 || (line_num > 0 && line_num <= ctx.lines.len() && ctx.lines[line_num - 1].blockquote.is_some())
517 || lines[i].trim().starts_with('#')
518 || TableUtils::is_potential_table_row(lines[i])
519 || lines[i].trim().is_empty()
520 || is_horizontal_rule(lines[i].trim())
521 || is_template_directive_only(lines[i])
522 || (lines[i].trim().starts_with('[') && lines[i].contains("]:"))
523 || ctx.line_info(line_num).is_some_and(|info| info.is_div_marker)
524 {
525 i += 1;
526 continue;
527 }
528
529 // Handle MkDocs container content (admonitions and tabs) with indent-preserving reflow
530 if ctx.line_info(line_num).is_some_and(|info| info.in_mkdocs_container()) {
531 // Skip admonition/tab marker lines — only reflow their indented content
532 let current_line = lines[i];
533 if mkdocs_admonitions::is_admonition_start(current_line) || mkdocs_tabs::is_tab_marker(current_line) {
534 i += 1;
535 continue;
536 }
537
538 let container_start = i;
539
540 // Detect the actual indent level from the first content line
541 // (supports nested admonitions with 8+ spaces)
542 let first_line = lines[i];
543 let base_indent_len = first_line.len() - first_line.trim_start().len();
544 let base_indent: String = " ".repeat(base_indent_len);
545
546 // Collect consecutive MkDocs container paragraph lines
547 let mut container_lines: Vec<&str> = Vec::new();
548 while i < lines.len() {
549 let current_line_num = i + 1;
550 let line_info = ctx.line_info(current_line_num);
551
552 // Stop if we leave the MkDocs container
553 if !line_info.is_some_and(|info| info.in_mkdocs_container()) {
554 break;
555 }
556
557 let line = lines[i];
558
559 // Stop at paragraph boundaries within the container
560 if line.trim().is_empty() {
561 break;
562 }
563
564 // Skip list items, code blocks, headings within containers
565 if is_list_item(line.trim())
566 || line.trim().starts_with("```")
567 || line.trim().starts_with("~~~")
568 || line.trim().starts_with('#')
569 {
570 break;
571 }
572
573 container_lines.push(line);
574 i += 1;
575 }
576
577 if container_lines.is_empty() {
578 // Must advance i to avoid infinite loop when we encounter
579 // non-paragraph content (code block, list, heading, empty line)
580 // at the start of an MkDocs container
581 i += 1;
582 continue;
583 }
584
585 // Strip the base indent from each line and join for reflow
586 let stripped_lines: Vec<&str> = container_lines
587 .iter()
588 .map(|line| {
589 if line.starts_with(&base_indent) {
590 &line[base_indent_len..]
591 } else {
592 line.trim_start()
593 }
594 })
595 .collect();
596 let paragraph_text = stripped_lines.join(" ");
597
598 // Check if reflow is needed
599 let needs_reflow = match config.reflow_mode {
600 ReflowMode::Normalize => container_lines.len() > 1,
601 ReflowMode::SentencePerLine => {
602 let sentences = split_into_sentences(¶graph_text);
603 sentences.len() > 1 || container_lines.len() > 1
604 }
605 ReflowMode::SemanticLineBreaks => {
606 let sentences = split_into_sentences(¶graph_text);
607 sentences.len() > 1
608 || container_lines.len() > 1
609 || container_lines
610 .iter()
611 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
612 }
613 ReflowMode::Default => container_lines
614 .iter()
615 .any(|line| self.calculate_effective_length(line) > config.line_length.get()),
616 };
617
618 if !needs_reflow {
619 continue;
620 }
621
622 // Calculate byte range for this container paragraph
623 let start_range = line_index.whole_line_range(container_start + 1);
624 let end_line = container_start + container_lines.len() - 1;
625 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
626 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
627 } else {
628 line_index.whole_line_range(end_line + 1)
629 };
630 let byte_range = start_range.start..end_range.end;
631
632 // Reflow with adjusted line length (accounting for the 4-space indent)
633 let reflow_line_length = if config.line_length.is_unlimited() {
634 usize::MAX
635 } else {
636 config.line_length.get().saturating_sub(base_indent_len).max(1)
637 };
638 let reflow_options = crate::utils::text_reflow::ReflowOptions {
639 line_length: reflow_line_length,
640 break_on_sentences: true,
641 preserve_breaks: false,
642 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
643 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
644 abbreviations: config.abbreviations_for_reflow(),
645 length_mode: self.reflow_length_mode(),
646 };
647 let reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
648
649 // Re-add the 4-space indent to each reflowed line
650 let reflowed_with_indent: Vec<String> =
651 reflowed.iter().map(|line| format!("{base_indent}{line}")).collect();
652 let reflowed_text = reflowed_with_indent.join("\n");
653
654 // Preserve trailing newline
655 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
656 format!("{reflowed_text}\n")
657 } else {
658 reflowed_text
659 };
660
661 // Only generate a warning if the replacement is different
662 let original_text = &ctx.content[byte_range.clone()];
663 if original_text != replacement {
664 warnings.push(LintWarning {
665 rule_name: Some(self.name().to_string()),
666 message: format!(
667 "Line length {} exceeds {} characters (in MkDocs container)",
668 container_lines.iter().map(|l| l.len()).max().unwrap_or(0),
669 config.line_length.get()
670 ),
671 line: container_start + 1,
672 column: 1,
673 end_line: end_line + 1,
674 end_column: lines[end_line].len() + 1,
675 severity: Severity::Warning,
676 fix: Some(crate::rule::Fix {
677 range: byte_range,
678 replacement,
679 }),
680 });
681 }
682 continue;
683 }
684
685 // Helper function to detect semantic line markers
686 let is_semantic_line = |content: &str| -> bool {
687 let trimmed = content.trim_start();
688 let semantic_markers = [
689 "NOTE:",
690 "WARNING:",
691 "IMPORTANT:",
692 "CAUTION:",
693 "TIP:",
694 "DANGER:",
695 "HINT:",
696 "INFO:",
697 ];
698 semantic_markers.iter().any(|marker| trimmed.starts_with(marker))
699 };
700
701 // Helper function to detect fence markers (opening or closing)
702 let is_fence_marker = |content: &str| -> bool {
703 let trimmed = content.trim_start();
704 trimmed.starts_with("```") || trimmed.starts_with("~~~")
705 };
706
707 // Check if this is a list item - handle it specially
708 let trimmed = lines[i].trim();
709 if is_list_item(trimmed) {
710 // Collect the entire list item including continuation lines
711 let list_start = i;
712 let (marker, first_content) = extract_list_marker_and_content(lines[i]);
713 let marker_len = marker.len();
714
715 // Track lines and their types (content, code block, fence, nested list)
716 #[derive(Clone)]
717 enum LineType {
718 Content(String),
719 CodeBlock(String, usize), // content and original indent
720 NestedListItem(String, usize), // full line content and original indent
721 SemanticLine(String), // Lines starting with NOTE:, WARNING:, etc that should stay separate
722 SnippetLine(String), // MkDocs Snippets delimiters (-8<-) that must stay on their own line
723 DivMarker(String), // Quarto/Pandoc div markers (::: opening or closing)
724 Empty,
725 }
726
727 let mut actual_indent: Option<usize> = None;
728 let mut list_item_lines: Vec<LineType> = vec![LineType::Content(first_content)];
729 i += 1;
730
731 // Collect continuation lines using ctx.lines for metadata
732 while i < lines.len() {
733 let line_info = &ctx.lines[i];
734
735 // Use pre-computed is_blank from ctx
736 if line_info.is_blank {
737 // Empty line - check if next line is indented (part of list item)
738 if i + 1 < lines.len() {
739 let next_info = &ctx.lines[i + 1];
740
741 // Check if next line is indented enough to be continuation
742 if !next_info.is_blank && next_info.indent >= marker_len {
743 // This blank line is between paragraphs/blocks in the list item
744 list_item_lines.push(LineType::Empty);
745 i += 1;
746 continue;
747 }
748 }
749 // No indented line after blank, end of list item
750 break;
751 }
752
753 // Use pre-computed indent from ctx
754 let indent = line_info.indent;
755
756 // Valid continuation must be indented at least marker_len
757 if indent >= marker_len {
758 let trimmed = line_info.content(ctx.content).trim();
759
760 // Use pre-computed in_code_block from ctx
761 if line_info.in_code_block {
762 list_item_lines.push(LineType::CodeBlock(
763 line_info.content(ctx.content)[indent..].to_string(),
764 indent,
765 ));
766 i += 1;
767 continue;
768 }
769
770 // Check if this is a SIBLING list item (breaks parent)
771 // Nested lists are indented >= marker_len and are PART of the parent item
772 // Siblings are at indent < marker_len (at or before parent marker)
773 if is_list_item(trimmed) && indent < marker_len {
774 // This is a sibling item at same or higher level - end parent item
775 break;
776 }
777
778 // Check if this is a NESTED list item marker
779 // Nested lists should be processed separately UNLESS they're part of a
780 // multi-paragraph list item (indicated by a blank line before them OR
781 // it's a continuation of an already-started nested list)
782 if is_list_item(trimmed) && indent >= marker_len {
783 // Check if there was a blank line before this (multi-paragraph context)
784 let has_blank_before = matches!(list_item_lines.last(), Some(LineType::Empty));
785
786 // Check if we've already seen nested list content (another nested item)
787 let has_nested_content = list_item_lines.iter().any(|line| {
788 matches!(line, LineType::Content(c) if is_list_item(c.trim()))
789 || matches!(line, LineType::NestedListItem(_, _))
790 });
791
792 if !has_blank_before && !has_nested_content {
793 // Single-paragraph context with no prior nested items: starts a new item
794 // End parent collection; nested list will be processed next
795 break;
796 }
797 // else: multi-paragraph context or continuation of nested list, keep collecting
798 // Mark this as a nested list item to preserve its structure
799 list_item_lines.push(LineType::NestedListItem(
800 line_info.content(ctx.content)[indent..].to_string(),
801 indent,
802 ));
803 i += 1;
804 continue;
805 }
806
807 // Normal continuation: marker_len to marker_len+3
808 if indent <= marker_len + 3 {
809 // Set actual_indent from first non-code continuation if not set
810 if actual_indent.is_none() {
811 actual_indent = Some(indent);
812 }
813
814 // Extract content (remove indentation and trailing whitespace)
815 // Preserve hard breaks (2 trailing spaces) while removing excessive whitespace
816 // See: https://github.com/rvben/rumdl/issues/76
817 let content = trim_preserving_hard_break(&line_info.content(ctx.content)[indent..]);
818
819 // Check if this is a div marker (::: opening or closing)
820 // These must be preserved on their own line, not merged into paragraphs
821 if line_info.is_div_marker {
822 list_item_lines.push(LineType::DivMarker(content));
823 }
824 // Check if this is a fence marker (opening or closing)
825 // These should be treated as code block lines, not paragraph content
826 else if is_fence_marker(&content) {
827 list_item_lines.push(LineType::CodeBlock(content, indent));
828 }
829 // Check if this is a semantic line (NOTE:, WARNING:, etc.)
830 else if is_semantic_line(&content) {
831 list_item_lines.push(LineType::SemanticLine(content));
832 }
833 // Check if this is a snippet block delimiter (-8<- or --8<--)
834 // These must be preserved on their own lines for MkDocs Snippets extension
835 else if is_snippet_block_delimiter(&content) {
836 list_item_lines.push(LineType::SnippetLine(content));
837 } else {
838 list_item_lines.push(LineType::Content(content));
839 }
840 i += 1;
841 } else {
842 // indent >= marker_len + 4: indented code block
843 list_item_lines.push(LineType::CodeBlock(
844 line_info.content(ctx.content)[indent..].to_string(),
845 indent,
846 ));
847 i += 1;
848 }
849 } else {
850 // Not indented enough, end of list item
851 break;
852 }
853 }
854
855 // Use detected indent or fallback to marker length
856 let indent_size = actual_indent.unwrap_or(marker_len);
857 let expected_indent = " ".repeat(indent_size);
858
859 // Split list_item_lines into blocks (paragraphs, code blocks, nested lists, semantic lines, and HTML blocks)
860 #[derive(Clone)]
861 enum Block {
862 Paragraph(Vec<String>),
863 Code {
864 lines: Vec<(String, usize)>, // (content, indent) pairs
865 has_preceding_blank: bool, // Whether there was a blank line before this block
866 },
867 NestedList(Vec<(String, usize)>), // (content, indent) pairs for nested list items
868 SemanticLine(String), // Semantic markers like NOTE:, WARNING: that stay on their own line
869 SnippetLine(String), // MkDocs Snippets delimiter that stays on its own line without extra spacing
870 DivMarker(String), // Quarto/Pandoc div marker (::: opening or closing) preserved on its own line
871 Html {
872 lines: Vec<String>, // HTML content preserved exactly as-is
873 has_preceding_blank: bool, // Whether there was a blank line before this block
874 },
875 }
876
877 // HTML tag detection helpers
878 // Block-level HTML tags that should trigger HTML block detection
879 const BLOCK_LEVEL_TAGS: &[&str] = &[
880 "div",
881 "details",
882 "summary",
883 "section",
884 "article",
885 "header",
886 "footer",
887 "nav",
888 "aside",
889 "main",
890 "table",
891 "thead",
892 "tbody",
893 "tfoot",
894 "tr",
895 "td",
896 "th",
897 "ul",
898 "ol",
899 "li",
900 "dl",
901 "dt",
902 "dd",
903 "pre",
904 "blockquote",
905 "figure",
906 "figcaption",
907 "form",
908 "fieldset",
909 "legend",
910 "hr",
911 "p",
912 "h1",
913 "h2",
914 "h3",
915 "h4",
916 "h5",
917 "h6",
918 "style",
919 "script",
920 "noscript",
921 ];
922
923 fn is_block_html_opening_tag(line: &str) -> Option<String> {
924 let trimmed = line.trim();
925
926 // Check for HTML comments
927 if trimmed.starts_with("<!--") {
928 return Some("!--".to_string());
929 }
930
931 // Check for opening tags
932 if trimmed.starts_with('<') && !trimmed.starts_with("</") && !trimmed.starts_with("<!") {
933 // Extract tag name from <tagname ...> or <tagname>
934 let after_bracket = &trimmed[1..];
935 if let Some(end) = after_bracket.find(|c: char| c.is_whitespace() || c == '>' || c == '/') {
936 let tag_name = after_bracket[..end].to_lowercase();
937
938 // Only treat as block if it's a known block-level tag
939 if BLOCK_LEVEL_TAGS.contains(&tag_name.as_str()) {
940 return Some(tag_name);
941 }
942 }
943 }
944 None
945 }
946
947 fn is_html_closing_tag(line: &str, tag_name: &str) -> bool {
948 let trimmed = line.trim();
949
950 // Special handling for HTML comments
951 if tag_name == "!--" {
952 return trimmed.ends_with("-->");
953 }
954
955 // Check for closing tags: </tagname> or </tagname ...>
956 trimmed.starts_with(&format!("</{tag_name}>"))
957 || trimmed.starts_with(&format!("</{tag_name} "))
958 || (trimmed.starts_with("</") && trimmed[2..].trim_start().starts_with(tag_name))
959 }
960
961 fn is_self_closing_tag(line: &str) -> bool {
962 let trimmed = line.trim();
963 trimmed.ends_with("/>")
964 }
965
966 let mut blocks: Vec<Block> = Vec::new();
967 let mut current_paragraph: Vec<String> = Vec::new();
968 let mut current_code_block: Vec<(String, usize)> = Vec::new();
969 let mut current_nested_list: Vec<(String, usize)> = Vec::new();
970 let mut current_html_block: Vec<String> = Vec::new();
971 let mut html_tag_stack: Vec<String> = Vec::new();
972 let mut in_code = false;
973 let mut in_nested_list = false;
974 let mut in_html_block = false;
975 let mut had_preceding_blank = false; // Track if we just saw an empty line
976 let mut code_block_has_preceding_blank = false; // Track blank before current code block
977 let mut html_block_has_preceding_blank = false; // Track blank before current HTML block
978
979 for line in &list_item_lines {
980 match line {
981 LineType::Empty => {
982 if in_code {
983 current_code_block.push((String::new(), 0));
984 } else if in_nested_list {
985 current_nested_list.push((String::new(), 0));
986 } else if in_html_block {
987 // Allow blank lines inside HTML blocks
988 current_html_block.push(String::new());
989 } else if !current_paragraph.is_empty() {
990 blocks.push(Block::Paragraph(current_paragraph.clone()));
991 current_paragraph.clear();
992 }
993 // Mark that we saw a blank line
994 had_preceding_blank = true;
995 }
996 LineType::Content(content) => {
997 // Check if we're currently in an HTML block
998 if in_html_block {
999 current_html_block.push(content.clone());
1000
1001 // Check if this line closes any open HTML tags
1002 if let Some(last_tag) = html_tag_stack.last() {
1003 if is_html_closing_tag(content, last_tag) {
1004 html_tag_stack.pop();
1005
1006 // If stack is empty, HTML block is complete
1007 if html_tag_stack.is_empty() {
1008 blocks.push(Block::Html {
1009 lines: current_html_block.clone(),
1010 has_preceding_blank: html_block_has_preceding_blank,
1011 });
1012 current_html_block.clear();
1013 in_html_block = false;
1014 }
1015 } else if let Some(new_tag) = is_block_html_opening_tag(content) {
1016 // Nested opening tag within HTML block
1017 if !is_self_closing_tag(content) {
1018 html_tag_stack.push(new_tag);
1019 }
1020 }
1021 }
1022 had_preceding_blank = false;
1023 } else {
1024 // Not in HTML block - check if this line starts one
1025 if let Some(tag_name) = is_block_html_opening_tag(content) {
1026 // Flush current paragraph before starting HTML block
1027 if in_code {
1028 blocks.push(Block::Code {
1029 lines: current_code_block.clone(),
1030 has_preceding_blank: code_block_has_preceding_blank,
1031 });
1032 current_code_block.clear();
1033 in_code = false;
1034 } else if in_nested_list {
1035 blocks.push(Block::NestedList(current_nested_list.clone()));
1036 current_nested_list.clear();
1037 in_nested_list = false;
1038 } else if !current_paragraph.is_empty() {
1039 blocks.push(Block::Paragraph(current_paragraph.clone()));
1040 current_paragraph.clear();
1041 }
1042
1043 // Start new HTML block
1044 in_html_block = true;
1045 html_block_has_preceding_blank = had_preceding_blank;
1046 current_html_block.push(content.clone());
1047
1048 // Check if it's self-closing or needs a closing tag
1049 if is_self_closing_tag(content) {
1050 // Self-closing tag - complete the HTML block immediately
1051 blocks.push(Block::Html {
1052 lines: current_html_block.clone(),
1053 has_preceding_blank: html_block_has_preceding_blank,
1054 });
1055 current_html_block.clear();
1056 in_html_block = false;
1057 } else {
1058 // Regular opening tag - push to stack
1059 html_tag_stack.push(tag_name);
1060 }
1061 } else {
1062 // Regular content line - add to paragraph
1063 if in_code {
1064 // Switching from code to content
1065 blocks.push(Block::Code {
1066 lines: current_code_block.clone(),
1067 has_preceding_blank: code_block_has_preceding_blank,
1068 });
1069 current_code_block.clear();
1070 in_code = false;
1071 } else if in_nested_list {
1072 // Switching from nested list to content
1073 blocks.push(Block::NestedList(current_nested_list.clone()));
1074 current_nested_list.clear();
1075 in_nested_list = false;
1076 }
1077 current_paragraph.push(content.clone());
1078 }
1079 had_preceding_blank = false; // Reset after content
1080 }
1081 }
1082 LineType::CodeBlock(content, indent) => {
1083 if in_nested_list {
1084 // Switching from nested list to code
1085 blocks.push(Block::NestedList(current_nested_list.clone()));
1086 current_nested_list.clear();
1087 in_nested_list = false;
1088 } else if in_html_block {
1089 // Switching from HTML block to code (shouldn't happen normally, but handle it)
1090 blocks.push(Block::Html {
1091 lines: current_html_block.clone(),
1092 has_preceding_blank: html_block_has_preceding_blank,
1093 });
1094 current_html_block.clear();
1095 html_tag_stack.clear();
1096 in_html_block = false;
1097 }
1098 if !in_code {
1099 // Switching from content to code
1100 if !current_paragraph.is_empty() {
1101 blocks.push(Block::Paragraph(current_paragraph.clone()));
1102 current_paragraph.clear();
1103 }
1104 in_code = true;
1105 // Record whether there was a blank line before this code block
1106 code_block_has_preceding_blank = had_preceding_blank;
1107 }
1108 current_code_block.push((content.clone(), *indent));
1109 had_preceding_blank = false; // Reset after code
1110 }
1111 LineType::NestedListItem(content, indent) => {
1112 if in_code {
1113 // Switching from code to nested list
1114 blocks.push(Block::Code {
1115 lines: current_code_block.clone(),
1116 has_preceding_blank: code_block_has_preceding_blank,
1117 });
1118 current_code_block.clear();
1119 in_code = false;
1120 } else if in_html_block {
1121 // Switching from HTML block to nested list (shouldn't happen normally, but handle it)
1122 blocks.push(Block::Html {
1123 lines: current_html_block.clone(),
1124 has_preceding_blank: html_block_has_preceding_blank,
1125 });
1126 current_html_block.clear();
1127 html_tag_stack.clear();
1128 in_html_block = false;
1129 }
1130 if !in_nested_list {
1131 // Switching from content to nested list
1132 if !current_paragraph.is_empty() {
1133 blocks.push(Block::Paragraph(current_paragraph.clone()));
1134 current_paragraph.clear();
1135 }
1136 in_nested_list = true;
1137 }
1138 current_nested_list.push((content.clone(), *indent));
1139 had_preceding_blank = false; // Reset after nested list
1140 }
1141 LineType::SemanticLine(content) => {
1142 // Semantic lines are standalone - flush any current block and add as separate block
1143 if in_code {
1144 blocks.push(Block::Code {
1145 lines: current_code_block.clone(),
1146 has_preceding_blank: code_block_has_preceding_blank,
1147 });
1148 current_code_block.clear();
1149 in_code = false;
1150 } else if in_nested_list {
1151 blocks.push(Block::NestedList(current_nested_list.clone()));
1152 current_nested_list.clear();
1153 in_nested_list = false;
1154 } else if in_html_block {
1155 blocks.push(Block::Html {
1156 lines: current_html_block.clone(),
1157 has_preceding_blank: html_block_has_preceding_blank,
1158 });
1159 current_html_block.clear();
1160 html_tag_stack.clear();
1161 in_html_block = false;
1162 } else if !current_paragraph.is_empty() {
1163 blocks.push(Block::Paragraph(current_paragraph.clone()));
1164 current_paragraph.clear();
1165 }
1166 // Add semantic line as its own block
1167 blocks.push(Block::SemanticLine(content.clone()));
1168 had_preceding_blank = false; // Reset after semantic line
1169 }
1170 LineType::SnippetLine(content) => {
1171 // Snippet delimiters (-8<-) are standalone - flush any current block and add as separate block
1172 // Unlike semantic lines, snippet lines don't add extra blank lines around them
1173 if in_code {
1174 blocks.push(Block::Code {
1175 lines: current_code_block.clone(),
1176 has_preceding_blank: code_block_has_preceding_blank,
1177 });
1178 current_code_block.clear();
1179 in_code = false;
1180 } else if in_nested_list {
1181 blocks.push(Block::NestedList(current_nested_list.clone()));
1182 current_nested_list.clear();
1183 in_nested_list = false;
1184 } else if in_html_block {
1185 blocks.push(Block::Html {
1186 lines: current_html_block.clone(),
1187 has_preceding_blank: html_block_has_preceding_blank,
1188 });
1189 current_html_block.clear();
1190 html_tag_stack.clear();
1191 in_html_block = false;
1192 } else if !current_paragraph.is_empty() {
1193 blocks.push(Block::Paragraph(current_paragraph.clone()));
1194 current_paragraph.clear();
1195 }
1196 // Add snippet line as its own block
1197 blocks.push(Block::SnippetLine(content.clone()));
1198 had_preceding_blank = false;
1199 }
1200 LineType::DivMarker(content) => {
1201 // Div markers (::: opening or closing) are standalone structural delimiters
1202 // Flush any current block and add as separate block
1203 if in_code {
1204 blocks.push(Block::Code {
1205 lines: current_code_block.clone(),
1206 has_preceding_blank: code_block_has_preceding_blank,
1207 });
1208 current_code_block.clear();
1209 in_code = false;
1210 } else if in_nested_list {
1211 blocks.push(Block::NestedList(current_nested_list.clone()));
1212 current_nested_list.clear();
1213 in_nested_list = false;
1214 } else if in_html_block {
1215 blocks.push(Block::Html {
1216 lines: current_html_block.clone(),
1217 has_preceding_blank: html_block_has_preceding_blank,
1218 });
1219 current_html_block.clear();
1220 html_tag_stack.clear();
1221 in_html_block = false;
1222 } else if !current_paragraph.is_empty() {
1223 blocks.push(Block::Paragraph(current_paragraph.clone()));
1224 current_paragraph.clear();
1225 }
1226 blocks.push(Block::DivMarker(content.clone()));
1227 had_preceding_blank = false;
1228 }
1229 }
1230 }
1231
1232 // Push remaining block
1233 if in_code && !current_code_block.is_empty() {
1234 blocks.push(Block::Code {
1235 lines: current_code_block,
1236 has_preceding_blank: code_block_has_preceding_blank,
1237 });
1238 } else if in_nested_list && !current_nested_list.is_empty() {
1239 blocks.push(Block::NestedList(current_nested_list));
1240 } else if in_html_block && !current_html_block.is_empty() {
1241 // If we still have an unclosed HTML block, push it anyway
1242 // (malformed HTML - missing closing tag)
1243 blocks.push(Block::Html {
1244 lines: current_html_block,
1245 has_preceding_blank: html_block_has_preceding_blank,
1246 });
1247 } else if !current_paragraph.is_empty() {
1248 blocks.push(Block::Paragraph(current_paragraph));
1249 }
1250
1251 // Check if reflowing is needed (only for content paragraphs, not code blocks or nested lists)
1252 let content_lines: Vec<String> = list_item_lines
1253 .iter()
1254 .filter_map(|line| {
1255 if let LineType::Content(s) = line {
1256 Some(s.clone())
1257 } else {
1258 None
1259 }
1260 })
1261 .collect();
1262
1263 // Check if we need to reflow this list item
1264 // We check the combined content to see if it exceeds length limits
1265 let combined_content = content_lines.join(" ").trim().to_string();
1266 let full_line = format!("{marker}{combined_content}");
1267
1268 // Helper to check if we should reflow in normalize mode
1269 let should_normalize = || {
1270 // Don't normalize if the list item only contains nested lists, code blocks, or semantic lines
1271 // DO normalize if it has plain text content that spans multiple lines
1272 let has_nested_lists = blocks.iter().any(|b| matches!(b, Block::NestedList(_)));
1273 let has_code_blocks = blocks.iter().any(|b| matches!(b, Block::Code { .. }));
1274 let has_semantic_lines = blocks.iter().any(|b| matches!(b, Block::SemanticLine(_)));
1275 let has_snippet_lines = blocks.iter().any(|b| matches!(b, Block::SnippetLine(_)));
1276 let has_div_markers = blocks.iter().any(|b| matches!(b, Block::DivMarker(_)));
1277 let has_paragraphs = blocks.iter().any(|b| matches!(b, Block::Paragraph(_)));
1278
1279 // If we have structural blocks but no paragraphs, don't normalize
1280 if (has_nested_lists
1281 || has_code_blocks
1282 || has_semantic_lines
1283 || has_snippet_lines
1284 || has_div_markers)
1285 && !has_paragraphs
1286 {
1287 return false;
1288 }
1289
1290 // If we have paragraphs, check if they span multiple lines or there are multiple blocks
1291 if has_paragraphs {
1292 let paragraph_count = blocks.iter().filter(|b| matches!(b, Block::Paragraph(_))).count();
1293 if paragraph_count > 1 {
1294 // Multiple paragraph blocks should be normalized
1295 return true;
1296 }
1297
1298 // Single paragraph block: normalize if it has multiple content lines
1299 if content_lines.len() > 1 {
1300 return true;
1301 }
1302 }
1303
1304 false
1305 };
1306
1307 let needs_reflow = match config.reflow_mode {
1308 ReflowMode::Normalize => {
1309 // Only reflow if:
1310 // 1. The combined line would exceed the limit, OR
1311 // 2. The list item should be normalized (has multi-line plain text)
1312 let combined_length = self.calculate_effective_length(&full_line);
1313 if combined_length > config.line_length.get() {
1314 true
1315 } else {
1316 should_normalize()
1317 }
1318 }
1319 ReflowMode::SentencePerLine => {
1320 // Check if list item has multiple sentences
1321 let sentences = split_into_sentences(&combined_content);
1322 sentences.len() > 1
1323 }
1324 ReflowMode::SemanticLineBreaks => {
1325 let sentences = split_into_sentences(&combined_content);
1326 sentences.len() > 1
1327 || (list_start..i).any(|line_idx| {
1328 self.calculate_effective_length(lines[line_idx]) > config.line_length.get()
1329 })
1330 }
1331 ReflowMode::Default => {
1332 // In default mode, only reflow if any individual line exceeds limit
1333 (list_start..i)
1334 .any(|line_idx| self.calculate_effective_length(lines[line_idx]) > config.line_length.get())
1335 }
1336 };
1337
1338 if needs_reflow {
1339 let start_range = line_index.whole_line_range(list_start + 1);
1340 let end_line = i - 1;
1341 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1342 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1343 } else {
1344 line_index.whole_line_range(end_line + 1)
1345 };
1346 let byte_range = start_range.start..end_range.end;
1347
1348 // Reflow each block (paragraphs only, preserve code blocks)
1349 // When line_length = 0 (no limit), use a very large value for reflow
1350 let reflow_line_length = if config.line_length.is_unlimited() {
1351 usize::MAX
1352 } else {
1353 config.line_length.get().saturating_sub(indent_size).max(1)
1354 };
1355 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1356 line_length: reflow_line_length,
1357 break_on_sentences: true,
1358 preserve_breaks: false,
1359 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1360 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1361 abbreviations: config.abbreviations_for_reflow(),
1362 length_mode: self.reflow_length_mode(),
1363 };
1364
1365 let mut result: Vec<String> = Vec::new();
1366 let mut is_first_block = true;
1367
1368 for (block_idx, block) in blocks.iter().enumerate() {
1369 match block {
1370 Block::Paragraph(para_lines) => {
1371 // Split the paragraph into segments at hard break boundaries
1372 // Each segment can be reflowed independently
1373 let segments = split_into_segments(para_lines);
1374
1375 for (segment_idx, segment) in segments.iter().enumerate() {
1376 // Check if this segment ends with a hard break and what type
1377 let hard_break_type = segment.last().and_then(|line| {
1378 let line = line.strip_suffix('\r').unwrap_or(line);
1379 if line.ends_with('\\') {
1380 Some("\\")
1381 } else if line.ends_with(" ") {
1382 Some(" ")
1383 } else {
1384 None
1385 }
1386 });
1387
1388 // Join and reflow the segment (removing the hard break marker for processing)
1389 let segment_for_reflow: Vec<String> = segment
1390 .iter()
1391 .map(|line| {
1392 // Strip hard break marker (2 spaces or backslash) for reflow processing
1393 if line.ends_with('\\') {
1394 line[..line.len() - 1].trim_end().to_string()
1395 } else if line.ends_with(" ") {
1396 line[..line.len() - 2].trim_end().to_string()
1397 } else {
1398 line.clone()
1399 }
1400 })
1401 .collect();
1402
1403 let segment_text = segment_for_reflow.join(" ").trim().to_string();
1404 if !segment_text.is_empty() {
1405 let reflowed =
1406 crate::utils::text_reflow::reflow_line(&segment_text, &reflow_options);
1407
1408 if is_first_block && segment_idx == 0 {
1409 // First segment of first block starts with marker
1410 result.push(format!("{marker}{}", reflowed[0]));
1411 for line in reflowed.iter().skip(1) {
1412 result.push(format!("{expected_indent}{line}"));
1413 }
1414 is_first_block = false;
1415 } else {
1416 // Subsequent segments
1417 for line in reflowed {
1418 result.push(format!("{expected_indent}{line}"));
1419 }
1420 }
1421
1422 // If this segment had a hard break, add it back to the last line
1423 // Preserve the original hard break format (backslash or two spaces)
1424 if let Some(break_marker) = hard_break_type
1425 && let Some(last_line) = result.last_mut()
1426 {
1427 last_line.push_str(break_marker);
1428 }
1429 }
1430 }
1431
1432 // Add blank line after paragraph block if there's a next block
1433 // BUT: check if next block is a code block that doesn't want a preceding blank
1434 // Also don't add blank lines before snippet lines (they should stay tight)
1435 if block_idx < blocks.len() - 1 {
1436 let next_block = &blocks[block_idx + 1];
1437 let should_add_blank = match next_block {
1438 Block::Code {
1439 has_preceding_blank, ..
1440 } => *has_preceding_blank,
1441 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1442 _ => true, // For all other blocks, add blank line
1443 };
1444 if should_add_blank {
1445 result.push(String::new());
1446 }
1447 }
1448 }
1449 Block::Code {
1450 lines: code_lines,
1451 has_preceding_blank: _,
1452 } => {
1453 // Preserve code blocks as-is with original indentation
1454 // NOTE: Blank line before code block is handled by the previous block
1455 // (see paragraph block's logic above)
1456
1457 for (idx, (content, orig_indent)) in code_lines.iter().enumerate() {
1458 if is_first_block && idx == 0 {
1459 // First line of first block gets marker
1460 result.push(format!(
1461 "{marker}{}",
1462 " ".repeat(orig_indent - marker_len) + content
1463 ));
1464 is_first_block = false;
1465 } else if content.is_empty() {
1466 result.push(String::new());
1467 } else {
1468 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1469 }
1470 }
1471 }
1472 Block::NestedList(nested_items) => {
1473 // Preserve nested list items as-is with original indentation
1474 if !is_first_block {
1475 result.push(String::new());
1476 }
1477
1478 for (idx, (content, orig_indent)) in nested_items.iter().enumerate() {
1479 if is_first_block && idx == 0 {
1480 // First line of first block gets marker
1481 result.push(format!(
1482 "{marker}{}",
1483 " ".repeat(orig_indent - marker_len) + content
1484 ));
1485 is_first_block = false;
1486 } else if content.is_empty() {
1487 result.push(String::new());
1488 } else {
1489 result.push(format!("{}{}", " ".repeat(*orig_indent), content));
1490 }
1491 }
1492
1493 // Add blank line after nested list if there's a next block
1494 // Check if next block is a code block that doesn't want a preceding blank
1495 if block_idx < blocks.len() - 1 {
1496 let next_block = &blocks[block_idx + 1];
1497 let should_add_blank = match next_block {
1498 Block::Code {
1499 has_preceding_blank, ..
1500 } => *has_preceding_blank,
1501 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1502 _ => true, // For all other blocks, add blank line
1503 };
1504 if should_add_blank {
1505 result.push(String::new());
1506 }
1507 }
1508 }
1509 Block::SemanticLine(content) => {
1510 // Preserve semantic lines (NOTE:, WARNING:, etc.) as-is on their own line
1511 // Add blank line before if not first block
1512 if !is_first_block {
1513 result.push(String::new());
1514 }
1515
1516 if is_first_block {
1517 // First block starts with marker
1518 result.push(format!("{marker}{content}"));
1519 is_first_block = false;
1520 } else {
1521 // Subsequent blocks use expected indent
1522 result.push(format!("{expected_indent}{content}"));
1523 }
1524
1525 // Add blank line after semantic line if there's a next block
1526 // Check if next block is a code block that doesn't want a preceding blank
1527 if block_idx < blocks.len() - 1 {
1528 let next_block = &blocks[block_idx + 1];
1529 let should_add_blank = match next_block {
1530 Block::Code {
1531 has_preceding_blank, ..
1532 } => *has_preceding_blank,
1533 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1534 _ => true, // For all other blocks, add blank line
1535 };
1536 if should_add_blank {
1537 result.push(String::new());
1538 }
1539 }
1540 }
1541 Block::SnippetLine(content) => {
1542 // Preserve snippet delimiters (-8<-) as-is on their own line
1543 // Unlike semantic lines, snippet lines don't add extra blank lines
1544 if is_first_block {
1545 // First block starts with marker
1546 result.push(format!("{marker}{content}"));
1547 is_first_block = false;
1548 } else {
1549 // Subsequent blocks use expected indent
1550 result.push(format!("{expected_indent}{content}"));
1551 }
1552 // No blank lines added before or after snippet delimiters
1553 }
1554 Block::DivMarker(content) => {
1555 // Preserve div markers (::: opening or closing) as-is on their own line
1556 if is_first_block {
1557 result.push(format!("{marker}{content}"));
1558 is_first_block = false;
1559 } else {
1560 result.push(format!("{expected_indent}{content}"));
1561 }
1562 }
1563 Block::Html {
1564 lines: html_lines,
1565 has_preceding_blank: _,
1566 } => {
1567 // Preserve HTML blocks exactly as-is with original indentation
1568 // NOTE: Blank line before HTML block is handled by the previous block
1569
1570 for (idx, line) in html_lines.iter().enumerate() {
1571 if is_first_block && idx == 0 {
1572 // First line of first block gets marker
1573 result.push(format!("{marker}{line}"));
1574 is_first_block = false;
1575 } else if line.is_empty() {
1576 // Preserve blank lines inside HTML blocks
1577 result.push(String::new());
1578 } else {
1579 // Preserve lines with their original content (already includes indentation)
1580 result.push(format!("{expected_indent}{line}"));
1581 }
1582 }
1583
1584 // Add blank line after HTML block if there's a next block
1585 if block_idx < blocks.len() - 1 {
1586 let next_block = &blocks[block_idx + 1];
1587 let should_add_blank = match next_block {
1588 Block::Code {
1589 has_preceding_blank, ..
1590 } => *has_preceding_blank,
1591 Block::Html {
1592 has_preceding_blank, ..
1593 } => *has_preceding_blank,
1594 Block::SnippetLine(_) | Block::DivMarker(_) => false,
1595 _ => true, // For all other blocks, add blank line
1596 };
1597 if should_add_blank {
1598 result.push(String::new());
1599 }
1600 }
1601 }
1602 }
1603 }
1604
1605 let reflowed_text = result.join("\n");
1606
1607 // Preserve trailing newline
1608 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1609 format!("{reflowed_text}\n")
1610 } else {
1611 reflowed_text
1612 };
1613
1614 // Get the original text to compare
1615 let original_text = &ctx.content[byte_range.clone()];
1616
1617 // Only generate a warning if the replacement is different from the original
1618 if original_text != replacement {
1619 // Generate an appropriate message based on why reflow is needed
1620 let message = match config.reflow_mode {
1621 ReflowMode::SentencePerLine => {
1622 let num_sentences = split_into_sentences(&combined_content).len();
1623 let num_lines = content_lines.len();
1624 if num_lines == 1 {
1625 // Single line with multiple sentences
1626 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1627 } else {
1628 // Multiple lines - could be split sentences or mixed
1629 format!(
1630 "Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)"
1631 )
1632 }
1633 }
1634 ReflowMode::SemanticLineBreaks => {
1635 let num_sentences = split_into_sentences(&combined_content).len();
1636 format!("Paragraph should use semantic line breaks ({num_sentences} sentences)")
1637 }
1638 ReflowMode::Normalize => {
1639 let combined_length = self.calculate_effective_length(&full_line);
1640 if combined_length > config.line_length.get() {
1641 format!(
1642 "Line length {} exceeds {} characters",
1643 combined_length,
1644 config.line_length.get()
1645 )
1646 } else {
1647 "Multi-line content can be normalized".to_string()
1648 }
1649 }
1650 ReflowMode::Default => {
1651 let combined_length = self.calculate_effective_length(&full_line);
1652 format!(
1653 "Line length {} exceeds {} characters",
1654 combined_length,
1655 config.line_length.get()
1656 )
1657 }
1658 };
1659
1660 warnings.push(LintWarning {
1661 rule_name: Some(self.name().to_string()),
1662 message,
1663 line: list_start + 1,
1664 column: 1,
1665 end_line: end_line + 1,
1666 end_column: lines[end_line].len() + 1,
1667 severity: Severity::Warning,
1668 fix: Some(crate::rule::Fix {
1669 range: byte_range,
1670 replacement,
1671 }),
1672 });
1673 }
1674 }
1675 continue;
1676 }
1677
1678 // Found start of a paragraph - collect all lines in it
1679 let paragraph_start = i;
1680 let mut paragraph_lines = vec![lines[i]];
1681 i += 1;
1682
1683 while i < lines.len() {
1684 let next_line = lines[i];
1685 let next_line_num = i + 1;
1686 let next_trimmed = next_line.trim();
1687
1688 // Stop at paragraph boundaries
1689 if next_trimmed.is_empty()
1690 || ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block)
1691 || ctx.line_info(next_line_num).is_some_and(|info| info.in_front_matter)
1692 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_block)
1693 || ctx.line_info(next_line_num).is_some_and(|info| info.in_html_comment)
1694 || ctx.line_info(next_line_num).is_some_and(|info| info.in_esm_block)
1695 || ctx.line_info(next_line_num).is_some_and(|info| info.in_jsx_expression)
1696 || ctx.line_info(next_line_num).is_some_and(|info| info.in_mdx_comment)
1697 || ctx
1698 .line_info(next_line_num)
1699 .is_some_and(|info| info.in_mkdocs_container())
1700 || (next_line_num > 0
1701 && next_line_num <= ctx.lines.len()
1702 && ctx.lines[next_line_num - 1].blockquote.is_some())
1703 || next_trimmed.starts_with('#')
1704 || TableUtils::is_potential_table_row(next_line)
1705 || is_list_item(next_trimmed)
1706 || is_horizontal_rule(next_trimmed)
1707 || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1708 || is_template_directive_only(next_line)
1709 || is_standalone_attr_list(next_line)
1710 || is_snippet_block_delimiter(next_line)
1711 || ctx.line_info(next_line_num).is_some_and(|info| info.is_div_marker)
1712 {
1713 break;
1714 }
1715
1716 // Check if the previous line ends with a hard break (2+ spaces or backslash)
1717 if i > 0 && has_hard_break(lines[i - 1]) {
1718 // Don't include lines after hard breaks in the same paragraph
1719 break;
1720 }
1721
1722 paragraph_lines.push(next_line);
1723 i += 1;
1724 }
1725
1726 // Combine paragraph lines into a single string for processing
1727 // This must be done BEFORE the needs_reflow check for sentence-per-line mode
1728 let paragraph_text = paragraph_lines.join(" ");
1729
1730 // Skip reflowing if this paragraph contains definition list items
1731 // Definition lists are multi-line structures that should not be joined
1732 let contains_definition_list = paragraph_lines
1733 .iter()
1734 .any(|line| crate::utils::is_definition_list_item(line));
1735
1736 if contains_definition_list {
1737 // Don't reflow definition lists - skip this paragraph
1738 i = paragraph_start + paragraph_lines.len();
1739 continue;
1740 }
1741
1742 // Skip reflowing if this paragraph contains MkDocs Snippets markers
1743 // Snippets blocks (-8<- ... -8<-) should be preserved exactly
1744 let contains_snippets = paragraph_lines.iter().any(|line| is_snippet_block_delimiter(line));
1745
1746 if contains_snippets {
1747 // Don't reflow Snippets blocks - skip this paragraph
1748 i = paragraph_start + paragraph_lines.len();
1749 continue;
1750 }
1751
1752 // Check if this paragraph needs reflowing
1753 let needs_reflow = match config.reflow_mode {
1754 ReflowMode::Normalize => {
1755 // In normalize mode, reflow multi-line paragraphs
1756 paragraph_lines.len() > 1
1757 }
1758 ReflowMode::SentencePerLine => {
1759 // In sentence-per-line mode, check if the JOINED paragraph has multiple sentences
1760 // Note: we check the joined text because sentences can span multiple lines
1761 let sentences = split_into_sentences(¶graph_text);
1762
1763 // Always reflow if multiple sentences on one line
1764 if sentences.len() > 1 {
1765 true
1766 } else if paragraph_lines.len() > 1 {
1767 // For single-sentence paragraphs spanning multiple lines:
1768 // Reflow if they COULD fit on one line (respecting line-length constraint)
1769 if config.line_length.is_unlimited() {
1770 // No line-length constraint - always join single sentences
1771 true
1772 } else {
1773 // Only join if it fits within line-length
1774 let effective_length = self.calculate_effective_length(¶graph_text);
1775 effective_length <= config.line_length.get()
1776 }
1777 } else {
1778 false
1779 }
1780 }
1781 ReflowMode::SemanticLineBreaks => {
1782 let sentences = split_into_sentences(¶graph_text);
1783 // Reflow if multiple sentences, multiple lines, or any line exceeds limit
1784 sentences.len() > 1
1785 || paragraph_lines.len() > 1
1786 || paragraph_lines
1787 .iter()
1788 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1789 }
1790 ReflowMode::Default => {
1791 // In default mode, only reflow if lines exceed limit
1792 paragraph_lines
1793 .iter()
1794 .any(|line| self.calculate_effective_length(line) > config.line_length.get())
1795 }
1796 };
1797
1798 if needs_reflow {
1799 // Calculate byte range for this paragraph
1800 // Use whole_line_range for each line and combine
1801 let start_range = line_index.whole_line_range(paragraph_start + 1);
1802 let end_line = paragraph_start + paragraph_lines.len() - 1;
1803
1804 // For the last line, we want to preserve any trailing newline
1805 let end_range = if end_line == lines.len() - 1 && !ctx.content.ends_with('\n') {
1806 // Last line without trailing newline - use line_text_range
1807 line_index.line_text_range(end_line + 1, 1, lines[end_line].len() + 1)
1808 } else {
1809 // Not the last line or has trailing newline - use whole_line_range
1810 line_index.whole_line_range(end_line + 1)
1811 };
1812
1813 let byte_range = start_range.start..end_range.end;
1814
1815 // Check if the paragraph ends with a hard break and what type
1816 let hard_break_type = paragraph_lines.last().and_then(|line| {
1817 let line = line.strip_suffix('\r').unwrap_or(line);
1818 if line.ends_with('\\') {
1819 Some("\\")
1820 } else if line.ends_with(" ") {
1821 Some(" ")
1822 } else {
1823 None
1824 }
1825 });
1826
1827 // Reflow the paragraph
1828 // When line_length = 0 (no limit), use a very large value for reflow
1829 let reflow_line_length = if config.line_length.is_unlimited() {
1830 usize::MAX
1831 } else {
1832 config.line_length.get()
1833 };
1834 let reflow_options = crate::utils::text_reflow::ReflowOptions {
1835 line_length: reflow_line_length,
1836 break_on_sentences: true,
1837 preserve_breaks: false,
1838 sentence_per_line: config.reflow_mode == ReflowMode::SentencePerLine,
1839 semantic_line_breaks: config.reflow_mode == ReflowMode::SemanticLineBreaks,
1840 abbreviations: config.abbreviations_for_reflow(),
1841 length_mode: self.reflow_length_mode(),
1842 };
1843 let mut reflowed = crate::utils::text_reflow::reflow_line(¶graph_text, &reflow_options);
1844
1845 // If the original paragraph ended with a hard break, preserve it
1846 // Preserve the original hard break format (backslash or two spaces)
1847 if let Some(break_marker) = hard_break_type
1848 && !reflowed.is_empty()
1849 {
1850 let last_idx = reflowed.len() - 1;
1851 if !has_hard_break(&reflowed[last_idx]) {
1852 reflowed[last_idx].push_str(break_marker);
1853 }
1854 }
1855
1856 let reflowed_text = reflowed.join("\n");
1857
1858 // Preserve trailing newline if the original paragraph had one
1859 let replacement = if end_line < lines.len() - 1 || ctx.content.ends_with('\n') {
1860 format!("{reflowed_text}\n")
1861 } else {
1862 reflowed_text
1863 };
1864
1865 // Get the original text to compare
1866 let original_text = &ctx.content[byte_range.clone()];
1867
1868 // Only generate a warning if the replacement is different from the original
1869 if original_text != replacement {
1870 // Create warning with actual fix
1871 // In default mode, report the specific line that violates
1872 // In normalize mode, report the whole paragraph
1873 // In sentence-per-line mode, report the entire paragraph
1874 let (warning_line, warning_end_line) = match config.reflow_mode {
1875 ReflowMode::Normalize => (paragraph_start + 1, end_line + 1),
1876 ReflowMode::SentencePerLine | ReflowMode::SemanticLineBreaks => {
1877 // Highlight the entire paragraph that needs reformatting
1878 (paragraph_start + 1, paragraph_start + paragraph_lines.len())
1879 }
1880 ReflowMode::Default => {
1881 // Find the first line that exceeds the limit
1882 let mut violating_line = paragraph_start;
1883 for (idx, line) in paragraph_lines.iter().enumerate() {
1884 if self.calculate_effective_length(line) > config.line_length.get() {
1885 violating_line = paragraph_start + idx;
1886 break;
1887 }
1888 }
1889 (violating_line + 1, violating_line + 1)
1890 }
1891 };
1892
1893 warnings.push(LintWarning {
1894 rule_name: Some(self.name().to_string()),
1895 message: match config.reflow_mode {
1896 ReflowMode::Normalize => format!(
1897 "Paragraph could be normalized to use line length of {} characters",
1898 config.line_length.get()
1899 ),
1900 ReflowMode::SentencePerLine => {
1901 let num_sentences = split_into_sentences(¶graph_text).len();
1902 if paragraph_lines.len() == 1 {
1903 // Single line with multiple sentences
1904 format!("Line contains {num_sentences} sentences (one sentence per line required)")
1905 } else {
1906 let num_lines = paragraph_lines.len();
1907 // Multiple lines - could be split sentences or mixed
1908 format!("Paragraph should have one sentence per line (found {num_sentences} sentences across {num_lines} lines)")
1909 }
1910 },
1911 ReflowMode::SemanticLineBreaks => {
1912 let num_sentences = split_into_sentences(¶graph_text).len();
1913 format!(
1914 "Paragraph should use semantic line breaks ({num_sentences} sentences)"
1915 )
1916 },
1917 ReflowMode::Default => format!("Line length exceeds {} characters", config.line_length.get()),
1918 },
1919 line: warning_line,
1920 column: 1,
1921 end_line: warning_end_line,
1922 end_column: lines[warning_end_line.saturating_sub(1)].len() + 1,
1923 severity: Severity::Warning,
1924 fix: Some(crate::rule::Fix {
1925 range: byte_range,
1926 replacement,
1927 }),
1928 });
1929 }
1930 }
1931 }
1932
1933 warnings
1934 }
1935
1936 /// Calculate string length based on the configured length mode
1937 fn calculate_string_length(&self, s: &str) -> usize {
1938 match self.config.length_mode {
1939 LengthMode::Chars => s.chars().count(),
1940 LengthMode::Visual => s.width(),
1941 LengthMode::Bytes => s.len(),
1942 }
1943 }
1944
1945 /// Calculate effective line length
1946 ///
1947 /// Returns the actual display length of the line using the configured length mode.
1948 fn calculate_effective_length(&self, line: &str) -> usize {
1949 self.calculate_string_length(line)
1950 }
1951
1952 /// Calculate line length with inline link/image URLs removed.
1953 ///
1954 /// For each inline link `[text](url)` or image `` on the line,
1955 /// computes the "savings" from removing the URL portion (keeping only `[text]`
1956 /// or `![alt]`). Returns `effective_length - total_savings`.
1957 ///
1958 /// Handles nested constructs (e.g., `[](url)`) by only counting the
1959 /// outermost construct to avoid double-counting.
1960 fn calculate_text_only_length(
1961 &self,
1962 effective_length: usize,
1963 line_number: usize,
1964 ctx: &crate::lint_context::LintContext,
1965 ) -> usize {
1966 let line_range = ctx.line_index.line_content_range(line_number);
1967 let line_byte_end = line_range.end;
1968
1969 // Collect inline links/images on this line: (byte_offset, byte_end, text_only_display_len)
1970 let mut constructs: Vec<(usize, usize, usize)> = Vec::new();
1971
1972 for link in &ctx.links {
1973 if link.line != line_number || link.is_reference {
1974 continue;
1975 }
1976 if !matches!(link.link_type, LinkType::Inline) {
1977 continue;
1978 }
1979 // Skip cross-line links
1980 if link.byte_end > line_byte_end {
1981 continue;
1982 }
1983 // `[text]` in configured length mode
1984 let text_only_len = 2 + self.calculate_string_length(&link.text);
1985 constructs.push((link.byte_offset, link.byte_end, text_only_len));
1986 }
1987
1988 for image in &ctx.images {
1989 if image.line != line_number || image.is_reference {
1990 continue;
1991 }
1992 if !matches!(image.link_type, LinkType::Inline) {
1993 continue;
1994 }
1995 // Skip cross-line images
1996 if image.byte_end > line_byte_end {
1997 continue;
1998 }
1999 // `![alt]` in configured length mode
2000 let text_only_len = 3 + self.calculate_string_length(&image.alt_text);
2001 constructs.push((image.byte_offset, image.byte_end, text_only_len));
2002 }
2003
2004 if constructs.is_empty() {
2005 return effective_length;
2006 }
2007
2008 // Sort by byte offset to handle overlapping/nested constructs
2009 constructs.sort_by_key(|&(start, _, _)| start);
2010
2011 let mut total_savings: usize = 0;
2012 let mut last_end: usize = 0;
2013
2014 for (start, end, text_only_len) in &constructs {
2015 // Skip constructs nested inside a previously counted one
2016 if *start < last_end {
2017 continue;
2018 }
2019 // Full construct length in configured length mode
2020 let full_source = &ctx.content[*start..*end];
2021 let full_len = self.calculate_string_length(full_source);
2022 total_savings += full_len.saturating_sub(*text_only_len);
2023 last_end = *end;
2024 }
2025
2026 effective_length.saturating_sub(total_savings)
2027 }
2028}