rumdl_lib/utils/
code_block_utils.rs1use crate::rules::blockquote_utils::BlockquoteUtils;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum CodeBlockContext {
9 Standalone,
11 Indented,
13 Adjacent,
15}
16
17pub struct CodeBlockUtils;
19
20impl CodeBlockUtils {
21 pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
23 let mut blocks = Vec::new();
24 let mut in_code_block = false;
25 let mut code_block_start = 0;
26 let mut opening_fence_char = ' ';
27 let mut opening_fence_len = 0;
28
29 let lines: Vec<&str> = content.lines().collect();
31 let mut line_positions = Vec::with_capacity(lines.len());
32 let mut pos = 0;
33 for line in &lines {
34 line_positions.push(pos);
35 pos += line.len() + 1; }
37
38 let mut list_context_indent: Vec<usize> = vec![0; lines.len()];
41 {
42 let mut in_list = false;
43 let mut continuation_indent: usize = 0;
44
45 for (i, line) in lines.iter().enumerate() {
46 let mut line_no_bq = line.to_string();
47 while BlockquoteUtils::is_blockquote(&line_no_bq) {
48 line_no_bq = BlockquoteUtils::extract_content(&line_no_bq);
49 }
50
51 let indent_level = line_no_bq.len() - line_no_bq.trim_start().len();
52 let trimmed = line_no_bq.trim_start();
53
54 let is_ordered = {
56 let first_char = trimmed.chars().next();
57 first_char.is_some_and(|c| c.is_numeric())
58 && trimmed.chars().position(|c| c == '.' || c == ')').is_some_and(|pos| {
59 pos > 0
60 && trimmed[..pos].chars().all(|c| c.is_numeric())
61 && trimmed.chars().nth(pos + 1).is_some_and(|c| c == ' ' || c == '\t')
62 })
63 };
64 let is_list_item =
65 trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") || is_ordered;
66
67 if is_list_item {
68 in_list = true;
69 let marker_width =
70 if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
71 1
72 } else {
73 trimmed.chars().take_while(|c| c.is_numeric()).count() + 1
74 };
75 let after_marker = &trimmed[marker_width..];
76 let spaces_after = after_marker.chars().take_while(|c| *c == ' ' || *c == '\t').count();
77 continuation_indent = indent_level + marker_width + spaces_after;
78 } else if in_list && !line_no_bq.trim().is_empty() && indent_level < continuation_indent {
79 in_list = false;
80 continuation_indent = 0;
81 }
82
83 list_context_indent[i] = if in_list { continuation_indent } else { 0 };
84 }
85 }
86
87 for (i, line) in lines.iter().enumerate() {
89 let line_start = line_positions[i];
90
91 let mut line_without_blockquote = line.to_string();
94 while BlockquoteUtils::is_blockquote(&line_without_blockquote) {
95 line_without_blockquote = BlockquoteUtils::extract_content(&line_without_blockquote);
96 }
97
98 let indent = line_without_blockquote.len() - line_without_blockquote.trim_start().len();
100 let trimmed = line_without_blockquote.trim_start();
101
102 let effective_indent = indent.saturating_sub(list_context_indent[i]);
106 if effective_indent <= 3 && (trimmed.starts_with("```") || trimmed.starts_with("~~~")) {
107 let fence_char = trimmed.chars().next().unwrap();
108 let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
109
110 if !in_code_block && fence_len >= 3 {
111 code_block_start = line_start;
113 in_code_block = true;
114 opening_fence_char = fence_char;
115 opening_fence_len = fence_len;
116 } else if in_code_block && fence_char == opening_fence_char && fence_len >= opening_fence_len {
117 let code_block_end = line_start + line.len();
119 blocks.push((code_block_start, code_block_end));
120 in_code_block = false;
121 opening_fence_char = ' ';
122 opening_fence_len = 0;
123 }
124 }
126 }
127
128 if in_code_block {
130 blocks.push((code_block_start, content.len()));
131 }
132
133 let mut in_indented_block = false;
142 let mut indented_block_start = 0;
143 let mut in_list_context = false;
144 let mut list_continuation_indent: usize = 0;
145
146 for (line_idx, line) in lines.iter().enumerate() {
147 let line_start = if line_idx < line_positions.len() {
148 line_positions[line_idx]
149 } else {
150 0
151 };
152
153 let mut line_without_blockquote = line.to_string();
155 while BlockquoteUtils::is_blockquote(&line_without_blockquote) {
156 line_without_blockquote = BlockquoteUtils::extract_content(&line_without_blockquote);
157 }
158
159 let indent_level = line_without_blockquote.len() - line_without_blockquote.trim_start().len();
161 let is_indented = line_without_blockquote.starts_with(" ") || line_without_blockquote.starts_with("\t");
162
163 let trimmed = line_without_blockquote.trim_start();
165
166 let is_ordered_list = {
169 let mut chars = trimmed.chars();
170 let first_char = chars.next();
171 if !first_char.is_some_and(|c| c.is_numeric()) {
172 false
173 } else {
174 let delimiter_char_pos = trimmed.chars().position(|c| c == '.' || c == ')');
176 match delimiter_char_pos {
177 Some(char_pos) if char_pos > 0 => {
178 let byte_pos = trimmed.char_indices().nth(char_pos).map(|(i, _)| i);
180 if let Some(byte_pos) = byte_pos {
181 let all_digits = trimmed[..byte_pos].chars().all(|c| c.is_numeric());
183 let has_space =
185 trimmed.chars().nth(char_pos + 1).is_some_and(|c| c == ' ' || c == '\t');
186 all_digits && has_space
187 } else {
188 false
189 }
190 }
191 _ => false,
192 }
193 }
194 };
195
196 let is_list_item =
197 trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") || is_ordered_list;
198
199 let prev_line_without_blockquote = if line_idx > 0 {
201 let mut prev = lines[line_idx - 1].to_string();
202 while BlockquoteUtils::is_blockquote(&prev) {
203 prev = BlockquoteUtils::extract_content(&prev);
204 }
205 prev
206 } else {
207 String::new()
208 };
209 let prev_blank = line_idx > 0 && prev_line_without_blockquote.trim().is_empty();
210
211 if is_list_item {
213 in_list_context = true;
215
216 let marker_column = indent_level;
222 let marker_width =
223 if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
224 1 } else {
226 trimmed.chars().take_while(|c| c.is_numeric()).count() + 1
228 };
229
230 let after_marker = &trimmed[marker_width..];
233 let spaces_after_marker = after_marker.chars().take_while(|c| *c == ' ' || *c == '\t').count();
234
235 list_continuation_indent = marker_column + marker_width + spaces_after_marker;
238 } else if in_list_context
239 && !line_without_blockquote.trim().is_empty()
240 && indent_level < list_continuation_indent
241 {
242 in_list_context = false;
244 list_continuation_indent = 0;
245 }
246
247 let is_list_continuation_paragraph = in_list_context
253 && indent_level >= list_continuation_indent
254 && indent_level < (list_continuation_indent + 4);
255
256 let is_code_block_in_list = in_list_context && indent_level >= (list_continuation_indent + 4);
257
258 if is_indented && !line_without_blockquote.trim().is_empty() && !is_list_item {
260 if is_code_block_in_list {
261 if !in_indented_block && prev_blank {
264 in_indented_block = true;
265 indented_block_start = line_start;
266 }
267 } else if !is_list_continuation_paragraph {
268 if !in_indented_block && prev_blank {
270 in_indented_block = true;
271 indented_block_start = line_start;
272 }
273 }
274 } else if in_indented_block {
276 let block_end = if line_idx > 0 && line_idx - 1 < line_positions.len() {
278 line_positions[line_idx - 1] + lines[line_idx - 1].len()
279 } else {
280 line_start
281 };
282 blocks.push((indented_block_start, block_end));
283 in_indented_block = false;
284 }
285 }
286
287 if in_indented_block {
289 blocks.push((indented_block_start, content.len()));
290 }
291
292 blocks.sort_by(|a, b| a.0.cmp(&b.0));
296 blocks
297 }
298
299 pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
301 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
303 }
304
305 pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
307 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
308 }
309
310 pub fn analyze_code_block_context(
313 lines: &[crate::lint_context::LineInfo],
314 line_idx: usize,
315 min_continuation_indent: usize,
316 ) -> CodeBlockContext {
317 if let Some(line_info) = lines.get(line_idx) {
318 if line_info.indent >= min_continuation_indent {
320 return CodeBlockContext::Indented;
321 }
322
323 let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
325
326 if prev_blanks > 0 || next_blanks > 0 {
329 return CodeBlockContext::Standalone;
330 }
331
332 CodeBlockContext::Adjacent
334 } else {
335 CodeBlockContext::Adjacent
337 }
338 }
339
340 fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
342 let mut prev_blanks = 0;
343 let mut next_blanks = 0;
344
345 for i in (0..line_idx).rev() {
347 if let Some(line) = lines.get(i) {
348 if line.is_blank {
349 prev_blanks += 1;
350 } else {
351 break;
352 }
353 } else {
354 break;
355 }
356 }
357
358 for i in (line_idx + 1)..lines.len() {
360 if let Some(line) = lines.get(i) {
361 if line.is_blank {
362 next_blanks += 1;
363 } else {
364 break;
365 }
366 } else {
367 break;
368 }
369 }
370
371 (prev_blanks, next_blanks)
372 }
373
374 pub fn calculate_min_continuation_indent(
377 content: &str,
378 lines: &[crate::lint_context::LineInfo],
379 current_line_idx: usize,
380 ) -> usize {
381 for i in (0..current_line_idx).rev() {
383 if let Some(line_info) = lines.get(i) {
384 if let Some(list_item) = &line_info.list_item {
385 return if list_item.is_ordered {
387 list_item.marker_column + list_item.marker.len() + 1 } else {
389 list_item.marker_column + 2 };
391 }
392
393 if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
395 break;
396 }
397 }
398 }
399
400 0 }
402
403 fn is_structural_separator(content: &str) -> bool {
405 let trimmed = content.trim();
406 trimmed.starts_with("---")
407 || trimmed.starts_with("***")
408 || trimmed.starts_with("___")
409 || trimmed.contains('|') || trimmed.starts_with(">") }
412}
413
414#[cfg(test)]
415mod tests {
416 use super::*;
417
418 #[test]
419 fn test_detect_fenced_code_blocks() {
420 let content = "Some text\n```\ncode here\n```\nMore text";
425 let blocks = CodeBlockUtils::detect_code_blocks(content);
426 assert_eq!(blocks.len(), 1);
428
429 let fenced_block = blocks
431 .iter()
432 .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
433 assert!(fenced_block.is_some());
434
435 let content = "Some text\n~~~\ncode here\n~~~\nMore text";
437 let blocks = CodeBlockUtils::detect_code_blocks(content);
438 assert_eq!(blocks.len(), 1);
439 assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
440
441 let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
443 let blocks = CodeBlockUtils::detect_code_blocks(content);
444 assert_eq!(blocks.len(), 2);
446 }
447
448 #[test]
449 fn test_detect_code_blocks_with_language() {
450 let content = "Text\n```rust\nfn main() {}\n```\nMore";
452 let blocks = CodeBlockUtils::detect_code_blocks(content);
453 assert_eq!(blocks.len(), 1);
455 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
457 assert!(fenced.is_some());
458 }
459
460 #[test]
461 fn test_unclosed_code_block() {
462 let content = "Text\n```\ncode here\nno closing fence";
464 let blocks = CodeBlockUtils::detect_code_blocks(content);
465 assert_eq!(blocks.len(), 1);
466 assert_eq!(blocks[0].1, content.len());
467 }
468
469 #[test]
470 fn test_indented_code_blocks() {
471 let content = "Paragraph\n\n code line 1\n code line 2\n\nMore text";
473 let blocks = CodeBlockUtils::detect_code_blocks(content);
474 assert_eq!(blocks.len(), 1);
475 assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
476 assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
477
478 let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
480 let blocks = CodeBlockUtils::detect_code_blocks(content);
481 assert_eq!(blocks.len(), 1);
482 }
483
484 #[test]
485 fn test_indented_code_requires_blank_line() {
486 let content = "Paragraph\n indented but not code\nMore text";
488 let blocks = CodeBlockUtils::detect_code_blocks(content);
489 assert_eq!(blocks.len(), 0);
490
491 let content = "Paragraph\n\n now it's code\nMore text";
493 let blocks = CodeBlockUtils::detect_code_blocks(content);
494 assert_eq!(blocks.len(), 1);
495 }
496
497 #[test]
498 fn test_list_items_not_code_blocks() {
499 let content = "List:\n\n - Item 1\n - Item 2\n * Item 3\n + Item 4";
501 let blocks = CodeBlockUtils::detect_code_blocks(content);
502 assert_eq!(blocks.len(), 0);
503
504 let content = "List:\n\n 1. First\n 2. Second\n 1) Also first";
506 let blocks = CodeBlockUtils::detect_code_blocks(content);
507 assert_eq!(blocks.len(), 0);
508 }
509
510 #[test]
511 fn test_inline_code_spans_not_detected() {
512 let content = "Text with `inline code` here";
514 let blocks = CodeBlockUtils::detect_code_blocks(content);
515 assert_eq!(blocks.len(), 0); let content = "Text with ``code with ` backtick`` here";
519 let blocks = CodeBlockUtils::detect_code_blocks(content);
520 assert_eq!(blocks.len(), 0); let content = "Has `code1` and `code2` spans";
524 let blocks = CodeBlockUtils::detect_code_blocks(content);
525 assert_eq!(blocks.len(), 0); }
527
528 #[test]
529 fn test_unclosed_code_span() {
530 let content = "Text with `unclosed code span";
532 let blocks = CodeBlockUtils::detect_code_blocks(content);
533 assert_eq!(blocks.len(), 0);
534
535 let content = "Text with ``one style` different close";
537 let blocks = CodeBlockUtils::detect_code_blocks(content);
538 assert_eq!(blocks.len(), 0);
539 }
540
541 #[test]
542 fn test_mixed_code_blocks_and_spans() {
543 let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
544 let blocks = CodeBlockUtils::detect_code_blocks(content);
545 assert_eq!(blocks.len(), 1);
547
548 assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
550 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
552 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
553 }
554
555 #[test]
556 fn test_is_in_code_block_or_span() {
557 let blocks = vec![(10, 20), (30, 40), (50, 60)];
558
559 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
561 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
562 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
563
564 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
570 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
571 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
572 }
573
574 #[test]
575 fn test_empty_content() {
576 let blocks = CodeBlockUtils::detect_code_blocks("");
577 assert_eq!(blocks.len(), 0);
578 }
579
580 #[test]
581 fn test_code_block_at_start() {
582 let content = "```\ncode\n```\nText after";
583 let blocks = CodeBlockUtils::detect_code_blocks(content);
584 assert_eq!(blocks.len(), 1);
586 assert_eq!(blocks[0].0, 0); }
588
589 #[test]
590 fn test_code_block_at_end() {
591 let content = "Text before\n```\ncode\n```";
592 let blocks = CodeBlockUtils::detect_code_blocks(content);
593 assert_eq!(blocks.len(), 1);
595 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
597 assert!(fenced.is_some());
598 }
599
600 #[test]
601 fn test_nested_fence_markers() {
602 let content = "Text\n````\n```\nnested\n```\n````\nAfter";
604 let blocks = CodeBlockUtils::detect_code_blocks(content);
605 assert!(!blocks.is_empty());
607 let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
609 assert!(outer.is_some());
610 }
611
612 #[test]
613 fn test_indented_code_with_blank_lines() {
614 let content = "Text\n\n line1\n\n line2\n\nAfter";
616 let blocks = CodeBlockUtils::detect_code_blocks(content);
617 assert!(!blocks.is_empty());
619 let all_content: String = blocks
621 .iter()
622 .map(|(s, e)| &content[*s..*e])
623 .collect::<Vec<_>>()
624 .join("");
625 assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
626 }
627
628 #[test]
629 fn test_code_span_with_spaces() {
630 let content = "Text ` code with spaces ` more";
632 let blocks = CodeBlockUtils::detect_code_blocks(content);
633 assert_eq!(blocks.len(), 0); }
635
636 #[test]
637 fn test_fenced_block_with_info_string() {
638 let content = "```rust,no_run,should_panic\ncode\n```";
640 let blocks = CodeBlockUtils::detect_code_blocks(content);
641 assert_eq!(blocks.len(), 1);
643 assert_eq!(blocks[0].0, 0);
644 }
645
646 #[test]
647 fn test_indented_fences_not_code_blocks() {
648 let content = "Text\n ```\n code\n ```\nAfter";
650 let blocks = CodeBlockUtils::detect_code_blocks(content);
651 assert_eq!(blocks.len(), 1);
653 }
654}