1use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
12
13pub type CodeRanges = (Vec<(usize, usize)>, Vec<(usize, usize)>);
15
16#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum CodeBlockContext {
19 Standalone,
21 Indented,
23 Adjacent,
25}
26
27pub struct CodeBlockUtils;
29
30impl CodeBlockUtils {
31 pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
41 let (blocks, _) = Self::detect_code_blocks_and_spans(content);
42 blocks
43 }
44
45 pub fn detect_code_blocks_and_spans(content: &str) -> CodeRanges {
47 let mut blocks = Vec::new();
48 let mut spans = Vec::new();
49 let mut code_block_start: Option<usize> = None;
50
51 let options = Options::all();
53 let parser = Parser::new_ext(content, options).into_offset_iter();
54
55 for (event, range) in parser {
56 match event {
57 Event::Start(Tag::CodeBlock(_)) => {
58 code_block_start = Some(range.start);
60 }
61 Event::End(TagEnd::CodeBlock) => {
62 if let Some(start) = code_block_start.take() {
64 blocks.push((start, range.end));
65 }
66 }
67 Event::Code(_) => {
68 spans.push((range.start, range.end));
69 }
70 _ => {}
71 }
72 }
73
74 if let Some(start) = code_block_start {
77 blocks.push((start, content.len()));
78 }
79
80 blocks.sort_by_key(|&(start, _)| start);
82 (blocks, spans)
83 }
84
85 pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
87 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
89 }
90
91 pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
93 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
94 }
95
96 pub fn analyze_code_block_context(
99 lines: &[crate::lint_context::LineInfo],
100 line_idx: usize,
101 min_continuation_indent: usize,
102 ) -> CodeBlockContext {
103 if let Some(line_info) = lines.get(line_idx) {
104 if line_info.indent >= min_continuation_indent {
106 return CodeBlockContext::Indented;
107 }
108
109 let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
111
112 if prev_blanks > 0 || next_blanks > 0 {
115 return CodeBlockContext::Standalone;
116 }
117
118 CodeBlockContext::Adjacent
120 } else {
121 CodeBlockContext::Adjacent
123 }
124 }
125
126 fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
128 let mut prev_blanks = 0;
129 let mut next_blanks = 0;
130
131 for i in (0..line_idx).rev() {
133 if let Some(line) = lines.get(i) {
134 if line.is_blank {
135 prev_blanks += 1;
136 } else {
137 break;
138 }
139 } else {
140 break;
141 }
142 }
143
144 for i in (line_idx + 1)..lines.len() {
146 if let Some(line) = lines.get(i) {
147 if line.is_blank {
148 next_blanks += 1;
149 } else {
150 break;
151 }
152 } else {
153 break;
154 }
155 }
156
157 (prev_blanks, next_blanks)
158 }
159
160 pub fn calculate_min_continuation_indent(
163 content: &str,
164 lines: &[crate::lint_context::LineInfo],
165 current_line_idx: usize,
166 ) -> usize {
167 for i in (0..current_line_idx).rev() {
169 if let Some(line_info) = lines.get(i) {
170 if let Some(list_item) = &line_info.list_item {
171 return if list_item.is_ordered {
173 list_item.marker_column + list_item.marker.len() + 1 } else {
175 list_item.marker_column + 2 };
177 }
178
179 if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
181 break;
182 }
183 }
184 }
185
186 0 }
188
189 fn is_structural_separator(content: &str) -> bool {
191 let trimmed = content.trim();
192 trimmed.starts_with("---")
193 || trimmed.starts_with("***")
194 || trimmed.starts_with("___")
195 || crate::utils::skip_context::is_table_line(trimmed)
196 || trimmed.starts_with(">") }
198
199 pub fn detect_markdown_code_blocks(content: &str) -> Vec<MarkdownCodeBlock> {
207 use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
208
209 let mut blocks = Vec::new();
210 let mut current_block: Option<MarkdownCodeBlockBuilder> = None;
211
212 let options = Options::all();
213 let parser = Parser::new_ext(content, options).into_offset_iter();
214
215 for (event, range) in parser {
216 match event {
217 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
218 let language = info.split_whitespace().next().unwrap_or("");
220 if language.eq_ignore_ascii_case("markdown") || language.eq_ignore_ascii_case("md") {
221 let block_start = range.start;
223 let content_start = content[block_start..]
224 .find('\n')
225 .map(|i| block_start + i + 1)
226 .unwrap_or(content.len());
227
228 current_block = Some(MarkdownCodeBlockBuilder { content_start });
229 }
230 }
231 Event::End(TagEnd::CodeBlock) => {
232 if let Some(builder) = current_block.take() {
233 let block_end = range.end;
235
236 if builder.content_start > block_end || builder.content_start > content.len() {
238 continue;
239 }
240
241 let search_range = &content[builder.content_start..block_end.min(content.len())];
242 let content_end = search_range
243 .rfind('\n')
244 .map(|i| builder.content_start + i)
245 .unwrap_or(builder.content_start);
246
247 if content_end >= builder.content_start {
249 blocks.push(MarkdownCodeBlock {
250 content_start: builder.content_start,
251 content_end,
252 });
253 }
254 }
255 }
256 _ => {}
257 }
258 }
259
260 blocks
261 }
262}
263
264#[derive(Debug, Clone)]
266pub struct MarkdownCodeBlock {
267 pub content_start: usize,
269 pub content_end: usize,
271}
272
273struct MarkdownCodeBlockBuilder {
275 content_start: usize,
276}
277
278#[cfg(test)]
279mod tests {
280 use super::*;
281
282 #[test]
283 fn test_detect_fenced_code_blocks() {
284 let content = "Some text\n```\ncode here\n```\nMore text";
289 let blocks = CodeBlockUtils::detect_code_blocks(content);
290 assert_eq!(blocks.len(), 1);
292
293 let fenced_block = blocks
295 .iter()
296 .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
297 assert!(fenced_block.is_some());
298
299 let content = "Some text\n~~~\ncode here\n~~~\nMore text";
301 let blocks = CodeBlockUtils::detect_code_blocks(content);
302 assert_eq!(blocks.len(), 1);
303 assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
304
305 let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
307 let blocks = CodeBlockUtils::detect_code_blocks(content);
308 assert_eq!(blocks.len(), 2);
310 }
311
312 #[test]
313 fn test_detect_code_blocks_with_language() {
314 let content = "Text\n```rust\nfn main() {}\n```\nMore";
316 let blocks = CodeBlockUtils::detect_code_blocks(content);
317 assert_eq!(blocks.len(), 1);
319 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
321 assert!(fenced.is_some());
322 }
323
324 #[test]
325 fn test_unclosed_code_block() {
326 let content = "Text\n```\ncode here\nno closing fence";
328 let blocks = CodeBlockUtils::detect_code_blocks(content);
329 assert_eq!(blocks.len(), 1);
330 assert_eq!(blocks[0].1, content.len());
331 }
332
333 #[test]
334 fn test_indented_code_blocks() {
335 let content = "Paragraph\n\n code line 1\n code line 2\n\nMore text";
337 let blocks = CodeBlockUtils::detect_code_blocks(content);
338 assert_eq!(blocks.len(), 1);
339 assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
340 assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
341
342 let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
344 let blocks = CodeBlockUtils::detect_code_blocks(content);
345 assert_eq!(blocks.len(), 1);
346 }
347
348 #[test]
349 fn test_indented_code_requires_blank_line() {
350 let content = "Paragraph\n indented but not code\nMore text";
352 let blocks = CodeBlockUtils::detect_code_blocks(content);
353 assert_eq!(blocks.len(), 0);
354
355 let content = "Paragraph\n\n now it's code\nMore text";
357 let blocks = CodeBlockUtils::detect_code_blocks(content);
358 assert_eq!(blocks.len(), 1);
359 }
360
361 #[test]
362 fn test_indented_content_with_list_markers_is_code_block() {
363 let content = "List:\n\n - Item 1\n - Item 2\n * Item 3\n + Item 4";
368 let blocks = CodeBlockUtils::detect_code_blocks(content);
369 assert_eq!(blocks.len(), 1); let content = "List:\n\n 1. First\n 2. Second";
373 let blocks = CodeBlockUtils::detect_code_blocks(content);
374 assert_eq!(blocks.len(), 1); }
376
377 #[test]
378 fn test_actual_list_items_not_code_blocks() {
379 let content = "- Item 1\n- Item 2\n* Item 3";
381 let blocks = CodeBlockUtils::detect_code_blocks(content);
382 assert_eq!(blocks.len(), 0);
383
384 let content = "- Item 1\n - Nested item\n- Item 2";
386 let blocks = CodeBlockUtils::detect_code_blocks(content);
387 assert_eq!(blocks.len(), 0);
388 }
389
390 #[test]
391 fn test_inline_code_spans_not_detected() {
392 let content = "Text with `inline code` here";
394 let blocks = CodeBlockUtils::detect_code_blocks(content);
395 assert_eq!(blocks.len(), 0); let content = "Text with ``code with ` backtick`` here";
399 let blocks = CodeBlockUtils::detect_code_blocks(content);
400 assert_eq!(blocks.len(), 0); let content = "Has `code1` and `code2` spans";
404 let blocks = CodeBlockUtils::detect_code_blocks(content);
405 assert_eq!(blocks.len(), 0); }
407
408 #[test]
409 fn test_unclosed_code_span() {
410 let content = "Text with `unclosed code span";
412 let blocks = CodeBlockUtils::detect_code_blocks(content);
413 assert_eq!(blocks.len(), 0);
414
415 let content = "Text with ``one style` different close";
417 let blocks = CodeBlockUtils::detect_code_blocks(content);
418 assert_eq!(blocks.len(), 0);
419 }
420
421 #[test]
422 fn test_mixed_code_blocks_and_spans() {
423 let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
424 let blocks = CodeBlockUtils::detect_code_blocks(content);
425 assert_eq!(blocks.len(), 1);
427
428 assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
430 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
432 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
433 }
434
435 #[test]
436 fn test_is_in_code_block_or_span() {
437 let blocks = vec![(10, 20), (30, 40), (50, 60)];
438
439 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
441 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
442 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
443
444 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
450 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
451 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
452 }
453
454 #[test]
455 fn test_empty_content() {
456 let blocks = CodeBlockUtils::detect_code_blocks("");
457 assert_eq!(blocks.len(), 0);
458 }
459
460 #[test]
461 fn test_code_block_at_start() {
462 let content = "```\ncode\n```\nText after";
463 let blocks = CodeBlockUtils::detect_code_blocks(content);
464 assert_eq!(blocks.len(), 1);
466 assert_eq!(blocks[0].0, 0); }
468
469 #[test]
470 fn test_code_block_at_end() {
471 let content = "Text before\n```\ncode\n```";
472 let blocks = CodeBlockUtils::detect_code_blocks(content);
473 assert_eq!(blocks.len(), 1);
475 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
477 assert!(fenced.is_some());
478 }
479
480 #[test]
481 fn test_nested_fence_markers() {
482 let content = "Text\n````\n```\nnested\n```\n````\nAfter";
484 let blocks = CodeBlockUtils::detect_code_blocks(content);
485 assert!(!blocks.is_empty());
487 let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
489 assert!(outer.is_some());
490 }
491
492 #[test]
493 fn test_indented_code_with_blank_lines() {
494 let content = "Text\n\n line1\n\n line2\n\nAfter";
496 let blocks = CodeBlockUtils::detect_code_blocks(content);
497 assert!(!blocks.is_empty());
499 let all_content: String = blocks
501 .iter()
502 .map(|(s, e)| &content[*s..*e])
503 .collect::<Vec<_>>()
504 .join("");
505 assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
506 }
507
508 #[test]
509 fn test_code_span_with_spaces() {
510 let content = "Text ` code with spaces ` more";
512 let blocks = CodeBlockUtils::detect_code_blocks(content);
513 assert_eq!(blocks.len(), 0); }
515
516 #[test]
517 fn test_fenced_block_with_info_string() {
518 let content = "```rust,no_run,should_panic\ncode\n```";
520 let blocks = CodeBlockUtils::detect_code_blocks(content);
521 assert_eq!(blocks.len(), 1);
523 assert_eq!(blocks[0].0, 0);
524 }
525
526 #[test]
527 fn test_indented_fences_not_code_blocks() {
528 let content = "Text\n ```\n code\n ```\nAfter";
530 let blocks = CodeBlockUtils::detect_code_blocks(content);
531 assert_eq!(blocks.len(), 1);
533 }
534
535 #[test]
537 fn test_backticks_in_info_string_not_code_block() {
538 let content = "```something```\n\n```bash\n# comment\n```";
544 let blocks = CodeBlockUtils::detect_code_blocks(content);
545 assert_eq!(blocks.len(), 1);
547 assert!(content[blocks[0].0..blocks[0].1].contains("# comment"));
549 }
550
551 #[test]
552 fn test_issue_175_reproduction() {
553 let content = "```something```\n\n```bash\n# Have a parrot\necho \"🦜\"\n```";
555 let blocks = CodeBlockUtils::detect_code_blocks(content);
556 assert_eq!(blocks.len(), 1);
558 assert!(content[blocks[0].0..blocks[0].1].contains("Have a parrot"));
559 }
560
561 #[test]
562 fn test_tilde_fence_allows_tildes_in_info_string() {
563 let content = "~~~abc~~~\ncode content\n~~~";
566 let blocks = CodeBlockUtils::detect_code_blocks(content);
567 assert_eq!(blocks.len(), 1);
569 }
570
571 #[test]
572 fn test_nested_longer_fence_contains_shorter() {
573 let content = "````\n```\nnested content\n```\n````";
575 let blocks = CodeBlockUtils::detect_code_blocks(content);
576 assert_eq!(blocks.len(), 1);
577 assert!(content[blocks[0].0..blocks[0].1].contains("nested content"));
578 }
579
580 #[test]
581 fn test_mixed_fence_types() {
582 let content = "~~~\n```\nmixed content\n~~~";
584 let blocks = CodeBlockUtils::detect_code_blocks(content);
585 assert_eq!(blocks.len(), 1);
586 assert!(content[blocks[0].0..blocks[0].1].contains("mixed content"));
587 }
588
589 #[test]
590 fn test_indented_code_in_list_issue_276() {
591 let content = r#"1. First item
5932. Second item with code:
594
595 # This is a code block in a list
596 print("Hello, world!")
597
5984. Third item"#;
599
600 let blocks = CodeBlockUtils::detect_code_blocks(content);
601 assert!(!blocks.is_empty(), "Should detect indented code block inside list");
603
604 let all_content: String = blocks
606 .iter()
607 .map(|(s, e)| &content[*s..*e])
608 .collect::<Vec<_>>()
609 .join("");
610 assert!(
611 all_content.contains("code block in a list") || all_content.contains("print"),
612 "Detected block should contain the code content: {all_content:?}"
613 );
614 }
615
616 #[test]
617 fn test_detect_markdown_code_blocks() {
618 let content = r#"# Example
619
620```markdown
621# Heading
622Content here
623```
624
625```md
626Another heading
627More content
628```
629
630```rust
631// Not markdown
632fn main() {}
633```
634"#;
635
636 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
637
638 assert_eq!(
640 blocks.len(),
641 2,
642 "Should detect exactly 2 markdown blocks, got {blocks:?}"
643 );
644
645 let first = &blocks[0];
647 let first_content = &content[first.content_start..first.content_end];
648 assert!(
649 first_content.contains("# Heading"),
650 "First block should contain '# Heading', got: {first_content:?}"
651 );
652
653 let second = &blocks[1];
655 let second_content = &content[second.content_start..second.content_end];
656 assert!(
657 second_content.contains("Another heading"),
658 "Second block should contain 'Another heading', got: {second_content:?}"
659 );
660 }
661
662 #[test]
663 fn test_detect_markdown_code_blocks_empty() {
664 let content = "# Just a heading\n\nNo code blocks here\n";
665 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
666 assert_eq!(blocks.len(), 0);
667 }
668
669 #[test]
670 fn test_detect_markdown_code_blocks_case_insensitive() {
671 let content = "```MARKDOWN\nContent\n```\n";
672 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
673 assert_eq!(blocks.len(), 1);
674 }
675
676 #[test]
677 fn test_detect_markdown_code_blocks_at_eof_no_trailing_newline() {
678 let content = "# Doc\n\n```markdown\nContent\n```";
680 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
681 assert_eq!(blocks.len(), 1);
682 let block_content = &content[blocks[0].content_start..blocks[0].content_end];
684 assert!(block_content.contains("Content"));
685 }
686
687 #[test]
688 fn test_detect_markdown_code_blocks_single_line_content() {
689 let content = "```markdown\nX\n```\n";
691 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
692 assert_eq!(blocks.len(), 1);
693 let block_content = &content[blocks[0].content_start..blocks[0].content_end];
694 assert_eq!(block_content, "X");
695 }
696
697 #[test]
698 fn test_detect_markdown_code_blocks_empty_content() {
699 let content = "```markdown\n```\n";
701 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
702 if !blocks.is_empty() {
705 assert!(blocks[0].content_start <= blocks[0].content_end);
707 }
708 }
709
710 #[test]
711 fn test_detect_markdown_code_blocks_validates_ranges() {
712 let test_cases = [
714 "", "```markdown", "```markdown\n", "```\n```", "```markdown\n```", " ```markdown\n X\n ```", ];
721
722 for content in test_cases {
723 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
725 for block in &blocks {
727 assert!(
728 block.content_start <= block.content_end,
729 "Invalid range in content: {content:?}"
730 );
731 assert!(
732 block.content_end <= content.len(),
733 "Range exceeds content length in: {content:?}"
734 );
735 }
736 }
737 }
738}