1use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
12
13#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum CodeBlockContext {
16 Standalone,
18 Indented,
20 Adjacent,
22}
23
24pub struct CodeBlockUtils;
26
27impl CodeBlockUtils {
28 pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
38 let mut blocks = Vec::new();
39 let mut code_block_start: Option<usize> = None;
40
41 let options = Options::all();
43 let parser = Parser::new_ext(content, options).into_offset_iter();
44
45 for (event, range) in parser {
46 match event {
47 Event::Start(Tag::CodeBlock(_)) => {
48 code_block_start = Some(range.start);
50 }
51 Event::End(TagEnd::CodeBlock) => {
52 if let Some(start) = code_block_start.take() {
54 blocks.push((start, range.end));
55 }
56 }
57 _ => {}
58 }
59 }
60
61 if let Some(start) = code_block_start {
64 blocks.push((start, content.len()));
65 }
66
67 blocks.sort_by_key(|&(start, _)| start);
69 blocks
70 }
71
72 pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
74 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
76 }
77
78 pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
80 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
81 }
82
83 pub fn analyze_code_block_context(
86 lines: &[crate::lint_context::LineInfo],
87 line_idx: usize,
88 min_continuation_indent: usize,
89 ) -> CodeBlockContext {
90 if let Some(line_info) = lines.get(line_idx) {
91 if line_info.indent >= min_continuation_indent {
93 return CodeBlockContext::Indented;
94 }
95
96 let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
98
99 if prev_blanks > 0 || next_blanks > 0 {
102 return CodeBlockContext::Standalone;
103 }
104
105 CodeBlockContext::Adjacent
107 } else {
108 CodeBlockContext::Adjacent
110 }
111 }
112
113 fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
115 let mut prev_blanks = 0;
116 let mut next_blanks = 0;
117
118 for i in (0..line_idx).rev() {
120 if let Some(line) = lines.get(i) {
121 if line.is_blank {
122 prev_blanks += 1;
123 } else {
124 break;
125 }
126 } else {
127 break;
128 }
129 }
130
131 for i in (line_idx + 1)..lines.len() {
133 if let Some(line) = lines.get(i) {
134 if line.is_blank {
135 next_blanks += 1;
136 } else {
137 break;
138 }
139 } else {
140 break;
141 }
142 }
143
144 (prev_blanks, next_blanks)
145 }
146
147 pub fn calculate_min_continuation_indent(
150 content: &str,
151 lines: &[crate::lint_context::LineInfo],
152 current_line_idx: usize,
153 ) -> usize {
154 for i in (0..current_line_idx).rev() {
156 if let Some(line_info) = lines.get(i) {
157 if let Some(list_item) = &line_info.list_item {
158 return if list_item.is_ordered {
160 list_item.marker_column + list_item.marker.len() + 1 } else {
162 list_item.marker_column + 2 };
164 }
165
166 if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
168 break;
169 }
170 }
171 }
172
173 0 }
175
176 fn is_structural_separator(content: &str) -> bool {
178 let trimmed = content.trim();
179 trimmed.starts_with("---")
180 || trimmed.starts_with("***")
181 || trimmed.starts_with("___")
182 || crate::utils::skip_context::is_table_line(trimmed)
183 || trimmed.starts_with(">") }
185
186 pub fn detect_markdown_code_blocks(content: &str) -> Vec<MarkdownCodeBlock> {
194 use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
195
196 let mut blocks = Vec::new();
197 let mut current_block: Option<MarkdownCodeBlockBuilder> = None;
198
199 let options = Options::all();
200 let parser = Parser::new_ext(content, options).into_offset_iter();
201
202 for (event, range) in parser {
203 match event {
204 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
205 let language = info.split_whitespace().next().unwrap_or("");
207 if language.eq_ignore_ascii_case("markdown") || language.eq_ignore_ascii_case("md") {
208 let block_start = range.start;
210 let content_start = content[block_start..]
211 .find('\n')
212 .map(|i| block_start + i + 1)
213 .unwrap_or(content.len());
214
215 current_block = Some(MarkdownCodeBlockBuilder { content_start });
216 }
217 }
218 Event::End(TagEnd::CodeBlock) => {
219 if let Some(builder) = current_block.take() {
220 let block_end = range.end;
222
223 if builder.content_start > block_end || builder.content_start > content.len() {
225 continue;
226 }
227
228 let search_range = &content[builder.content_start..block_end.min(content.len())];
229 let content_end = search_range
230 .rfind('\n')
231 .map(|i| builder.content_start + i)
232 .unwrap_or(builder.content_start);
233
234 if content_end >= builder.content_start {
236 blocks.push(MarkdownCodeBlock {
237 content_start: builder.content_start,
238 content_end,
239 });
240 }
241 }
242 }
243 _ => {}
244 }
245 }
246
247 blocks
248 }
249}
250
251#[derive(Debug, Clone)]
253pub struct MarkdownCodeBlock {
254 pub content_start: usize,
256 pub content_end: usize,
258}
259
260struct MarkdownCodeBlockBuilder {
262 content_start: usize,
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268
269 #[test]
270 fn test_detect_fenced_code_blocks() {
271 let content = "Some text\n```\ncode here\n```\nMore text";
276 let blocks = CodeBlockUtils::detect_code_blocks(content);
277 assert_eq!(blocks.len(), 1);
279
280 let fenced_block = blocks
282 .iter()
283 .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
284 assert!(fenced_block.is_some());
285
286 let content = "Some text\n~~~\ncode here\n~~~\nMore text";
288 let blocks = CodeBlockUtils::detect_code_blocks(content);
289 assert_eq!(blocks.len(), 1);
290 assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
291
292 let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
294 let blocks = CodeBlockUtils::detect_code_blocks(content);
295 assert_eq!(blocks.len(), 2);
297 }
298
299 #[test]
300 fn test_detect_code_blocks_with_language() {
301 let content = "Text\n```rust\nfn main() {}\n```\nMore";
303 let blocks = CodeBlockUtils::detect_code_blocks(content);
304 assert_eq!(blocks.len(), 1);
306 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
308 assert!(fenced.is_some());
309 }
310
311 #[test]
312 fn test_unclosed_code_block() {
313 let content = "Text\n```\ncode here\nno closing fence";
315 let blocks = CodeBlockUtils::detect_code_blocks(content);
316 assert_eq!(blocks.len(), 1);
317 assert_eq!(blocks[0].1, content.len());
318 }
319
320 #[test]
321 fn test_indented_code_blocks() {
322 let content = "Paragraph\n\n code line 1\n code line 2\n\nMore text";
324 let blocks = CodeBlockUtils::detect_code_blocks(content);
325 assert_eq!(blocks.len(), 1);
326 assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
327 assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
328
329 let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
331 let blocks = CodeBlockUtils::detect_code_blocks(content);
332 assert_eq!(blocks.len(), 1);
333 }
334
335 #[test]
336 fn test_indented_code_requires_blank_line() {
337 let content = "Paragraph\n indented but not code\nMore text";
339 let blocks = CodeBlockUtils::detect_code_blocks(content);
340 assert_eq!(blocks.len(), 0);
341
342 let content = "Paragraph\n\n now it's code\nMore text";
344 let blocks = CodeBlockUtils::detect_code_blocks(content);
345 assert_eq!(blocks.len(), 1);
346 }
347
348 #[test]
349 fn test_indented_content_with_list_markers_is_code_block() {
350 let content = "List:\n\n - Item 1\n - Item 2\n * Item 3\n + Item 4";
355 let blocks = CodeBlockUtils::detect_code_blocks(content);
356 assert_eq!(blocks.len(), 1); let content = "List:\n\n 1. First\n 2. Second";
360 let blocks = CodeBlockUtils::detect_code_blocks(content);
361 assert_eq!(blocks.len(), 1); }
363
364 #[test]
365 fn test_actual_list_items_not_code_blocks() {
366 let content = "- Item 1\n- Item 2\n* Item 3";
368 let blocks = CodeBlockUtils::detect_code_blocks(content);
369 assert_eq!(blocks.len(), 0);
370
371 let content = "- Item 1\n - Nested item\n- Item 2";
373 let blocks = CodeBlockUtils::detect_code_blocks(content);
374 assert_eq!(blocks.len(), 0);
375 }
376
377 #[test]
378 fn test_inline_code_spans_not_detected() {
379 let content = "Text with `inline code` here";
381 let blocks = CodeBlockUtils::detect_code_blocks(content);
382 assert_eq!(blocks.len(), 0); let content = "Text with ``code with ` backtick`` here";
386 let blocks = CodeBlockUtils::detect_code_blocks(content);
387 assert_eq!(blocks.len(), 0); let content = "Has `code1` and `code2` spans";
391 let blocks = CodeBlockUtils::detect_code_blocks(content);
392 assert_eq!(blocks.len(), 0); }
394
395 #[test]
396 fn test_unclosed_code_span() {
397 let content = "Text with `unclosed code span";
399 let blocks = CodeBlockUtils::detect_code_blocks(content);
400 assert_eq!(blocks.len(), 0);
401
402 let content = "Text with ``one style` different close";
404 let blocks = CodeBlockUtils::detect_code_blocks(content);
405 assert_eq!(blocks.len(), 0);
406 }
407
408 #[test]
409 fn test_mixed_code_blocks_and_spans() {
410 let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
411 let blocks = CodeBlockUtils::detect_code_blocks(content);
412 assert_eq!(blocks.len(), 1);
414
415 assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
417 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
419 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
420 }
421
422 #[test]
423 fn test_is_in_code_block_or_span() {
424 let blocks = vec![(10, 20), (30, 40), (50, 60)];
425
426 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
428 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
429 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
430
431 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
437 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
438 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
439 }
440
441 #[test]
442 fn test_empty_content() {
443 let blocks = CodeBlockUtils::detect_code_blocks("");
444 assert_eq!(blocks.len(), 0);
445 }
446
447 #[test]
448 fn test_code_block_at_start() {
449 let content = "```\ncode\n```\nText after";
450 let blocks = CodeBlockUtils::detect_code_blocks(content);
451 assert_eq!(blocks.len(), 1);
453 assert_eq!(blocks[0].0, 0); }
455
456 #[test]
457 fn test_code_block_at_end() {
458 let content = "Text before\n```\ncode\n```";
459 let blocks = CodeBlockUtils::detect_code_blocks(content);
460 assert_eq!(blocks.len(), 1);
462 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
464 assert!(fenced.is_some());
465 }
466
467 #[test]
468 fn test_nested_fence_markers() {
469 let content = "Text\n````\n```\nnested\n```\n````\nAfter";
471 let blocks = CodeBlockUtils::detect_code_blocks(content);
472 assert!(!blocks.is_empty());
474 let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
476 assert!(outer.is_some());
477 }
478
479 #[test]
480 fn test_indented_code_with_blank_lines() {
481 let content = "Text\n\n line1\n\n line2\n\nAfter";
483 let blocks = CodeBlockUtils::detect_code_blocks(content);
484 assert!(!blocks.is_empty());
486 let all_content: String = blocks
488 .iter()
489 .map(|(s, e)| &content[*s..*e])
490 .collect::<Vec<_>>()
491 .join("");
492 assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
493 }
494
495 #[test]
496 fn test_code_span_with_spaces() {
497 let content = "Text ` code with spaces ` more";
499 let blocks = CodeBlockUtils::detect_code_blocks(content);
500 assert_eq!(blocks.len(), 0); }
502
503 #[test]
504 fn test_fenced_block_with_info_string() {
505 let content = "```rust,no_run,should_panic\ncode\n```";
507 let blocks = CodeBlockUtils::detect_code_blocks(content);
508 assert_eq!(blocks.len(), 1);
510 assert_eq!(blocks[0].0, 0);
511 }
512
513 #[test]
514 fn test_indented_fences_not_code_blocks() {
515 let content = "Text\n ```\n code\n ```\nAfter";
517 let blocks = CodeBlockUtils::detect_code_blocks(content);
518 assert_eq!(blocks.len(), 1);
520 }
521
522 #[test]
524 fn test_backticks_in_info_string_not_code_block() {
525 let content = "```something```\n\n```bash\n# comment\n```";
531 let blocks = CodeBlockUtils::detect_code_blocks(content);
532 assert_eq!(blocks.len(), 1);
534 assert!(content[blocks[0].0..blocks[0].1].contains("# comment"));
536 }
537
538 #[test]
539 fn test_issue_175_reproduction() {
540 let content = "```something```\n\n```bash\n# Have a parrot\necho \"🦜\"\n```";
542 let blocks = CodeBlockUtils::detect_code_blocks(content);
543 assert_eq!(blocks.len(), 1);
545 assert!(content[blocks[0].0..blocks[0].1].contains("Have a parrot"));
546 }
547
548 #[test]
549 fn test_tilde_fence_allows_tildes_in_info_string() {
550 let content = "~~~abc~~~\ncode content\n~~~";
553 let blocks = CodeBlockUtils::detect_code_blocks(content);
554 assert_eq!(blocks.len(), 1);
556 }
557
558 #[test]
559 fn test_nested_longer_fence_contains_shorter() {
560 let content = "````\n```\nnested content\n```\n````";
562 let blocks = CodeBlockUtils::detect_code_blocks(content);
563 assert_eq!(blocks.len(), 1);
564 assert!(content[blocks[0].0..blocks[0].1].contains("nested content"));
565 }
566
567 #[test]
568 fn test_mixed_fence_types() {
569 let content = "~~~\n```\nmixed content\n~~~";
571 let blocks = CodeBlockUtils::detect_code_blocks(content);
572 assert_eq!(blocks.len(), 1);
573 assert!(content[blocks[0].0..blocks[0].1].contains("mixed content"));
574 }
575
576 #[test]
577 fn test_indented_code_in_list_issue_276() {
578 let content = r#"1. First item
5802. Second item with code:
581
582 # This is a code block in a list
583 print("Hello, world!")
584
5854. Third item"#;
586
587 let blocks = CodeBlockUtils::detect_code_blocks(content);
588 assert!(!blocks.is_empty(), "Should detect indented code block inside list");
590
591 let all_content: String = blocks
593 .iter()
594 .map(|(s, e)| &content[*s..*e])
595 .collect::<Vec<_>>()
596 .join("");
597 assert!(
598 all_content.contains("code block in a list") || all_content.contains("print"),
599 "Detected block should contain the code content: {all_content:?}"
600 );
601 }
602
603 #[test]
604 fn test_detect_markdown_code_blocks() {
605 let content = r#"# Example
606
607```markdown
608# Heading
609Content here
610```
611
612```md
613Another heading
614More content
615```
616
617```rust
618// Not markdown
619fn main() {}
620```
621"#;
622
623 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
624
625 assert_eq!(
627 blocks.len(),
628 2,
629 "Should detect exactly 2 markdown blocks, got {blocks:?}"
630 );
631
632 let first = &blocks[0];
634 let first_content = &content[first.content_start..first.content_end];
635 assert!(
636 first_content.contains("# Heading"),
637 "First block should contain '# Heading', got: {first_content:?}"
638 );
639
640 let second = &blocks[1];
642 let second_content = &content[second.content_start..second.content_end];
643 assert!(
644 second_content.contains("Another heading"),
645 "Second block should contain 'Another heading', got: {second_content:?}"
646 );
647 }
648
649 #[test]
650 fn test_detect_markdown_code_blocks_empty() {
651 let content = "# Just a heading\n\nNo code blocks here\n";
652 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
653 assert_eq!(blocks.len(), 0);
654 }
655
656 #[test]
657 fn test_detect_markdown_code_blocks_case_insensitive() {
658 let content = "```MARKDOWN\nContent\n```\n";
659 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
660 assert_eq!(blocks.len(), 1);
661 }
662
663 #[test]
664 fn test_detect_markdown_code_blocks_at_eof_no_trailing_newline() {
665 let content = "# Doc\n\n```markdown\nContent\n```";
667 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
668 assert_eq!(blocks.len(), 1);
669 let block_content = &content[blocks[0].content_start..blocks[0].content_end];
671 assert!(block_content.contains("Content"));
672 }
673
674 #[test]
675 fn test_detect_markdown_code_blocks_single_line_content() {
676 let content = "```markdown\nX\n```\n";
678 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
679 assert_eq!(blocks.len(), 1);
680 let block_content = &content[blocks[0].content_start..blocks[0].content_end];
681 assert_eq!(block_content, "X");
682 }
683
684 #[test]
685 fn test_detect_markdown_code_blocks_empty_content() {
686 let content = "```markdown\n```\n";
688 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
689 if !blocks.is_empty() {
692 assert!(blocks[0].content_start <= blocks[0].content_end);
694 }
695 }
696
697 #[test]
698 fn test_detect_markdown_code_blocks_validates_ranges() {
699 let test_cases = [
701 "", "```markdown", "```markdown\n", "```\n```", "```markdown\n```", " ```markdown\n X\n ```", ];
708
709 for content in test_cases {
710 let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
712 for block in &blocks {
714 assert!(
715 block.content_start <= block.content_end,
716 "Invalid range in content: {content:?}"
717 );
718 assert!(
719 block.content_end <= content.len(),
720 "Range exceeds content length in: {content:?}"
721 );
722 }
723 }
724 }
725}