1use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
12
13#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum CodeBlockContext {
16 Standalone,
18 Indented,
20 Adjacent,
22}
23
24pub struct CodeBlockUtils;
26
27impl CodeBlockUtils {
28 pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
38 let mut blocks = Vec::new();
39 let mut code_block_start: Option<usize> = None;
40
41 let options = Options::all();
43 let parser = Parser::new_ext(content, options).into_offset_iter();
44
45 for (event, range) in parser {
46 match event {
47 Event::Start(Tag::CodeBlock(_)) => {
48 code_block_start = Some(range.start);
50 }
51 Event::End(TagEnd::CodeBlock) => {
52 if let Some(start) = code_block_start.take() {
54 blocks.push((start, range.end));
55 }
56 }
57 _ => {}
58 }
59 }
60
61 if let Some(start) = code_block_start {
64 blocks.push((start, content.len()));
65 }
66
67 blocks.sort_by_key(|&(start, _)| start);
69 blocks
70 }
71
72 pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
74 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
76 }
77
78 pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
80 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
81 }
82
83 pub fn analyze_code_block_context(
86 lines: &[crate::lint_context::LineInfo],
87 line_idx: usize,
88 min_continuation_indent: usize,
89 ) -> CodeBlockContext {
90 if let Some(line_info) = lines.get(line_idx) {
91 if line_info.indent >= min_continuation_indent {
93 return CodeBlockContext::Indented;
94 }
95
96 let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
98
99 if prev_blanks > 0 || next_blanks > 0 {
102 return CodeBlockContext::Standalone;
103 }
104
105 CodeBlockContext::Adjacent
107 } else {
108 CodeBlockContext::Adjacent
110 }
111 }
112
113 fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
115 let mut prev_blanks = 0;
116 let mut next_blanks = 0;
117
118 for i in (0..line_idx).rev() {
120 if let Some(line) = lines.get(i) {
121 if line.is_blank {
122 prev_blanks += 1;
123 } else {
124 break;
125 }
126 } else {
127 break;
128 }
129 }
130
131 for i in (line_idx + 1)..lines.len() {
133 if let Some(line) = lines.get(i) {
134 if line.is_blank {
135 next_blanks += 1;
136 } else {
137 break;
138 }
139 } else {
140 break;
141 }
142 }
143
144 (prev_blanks, next_blanks)
145 }
146
147 pub fn calculate_min_continuation_indent(
150 content: &str,
151 lines: &[crate::lint_context::LineInfo],
152 current_line_idx: usize,
153 ) -> usize {
154 for i in (0..current_line_idx).rev() {
156 if let Some(line_info) = lines.get(i) {
157 if let Some(list_item) = &line_info.list_item {
158 return if list_item.is_ordered {
160 list_item.marker_column + list_item.marker.len() + 1 } else {
162 list_item.marker_column + 2 };
164 }
165
166 if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
168 break;
169 }
170 }
171 }
172
173 0 }
175
176 fn is_structural_separator(content: &str) -> bool {
178 let trimmed = content.trim();
179 trimmed.starts_with("---")
180 || trimmed.starts_with("***")
181 || trimmed.starts_with("___")
182 || trimmed.contains('|') || trimmed.starts_with(">") }
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190
191 #[test]
192 fn test_detect_fenced_code_blocks() {
193 let content = "Some text\n```\ncode here\n```\nMore text";
198 let blocks = CodeBlockUtils::detect_code_blocks(content);
199 assert_eq!(blocks.len(), 1);
201
202 let fenced_block = blocks
204 .iter()
205 .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
206 assert!(fenced_block.is_some());
207
208 let content = "Some text\n~~~\ncode here\n~~~\nMore text";
210 let blocks = CodeBlockUtils::detect_code_blocks(content);
211 assert_eq!(blocks.len(), 1);
212 assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
213
214 let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
216 let blocks = CodeBlockUtils::detect_code_blocks(content);
217 assert_eq!(blocks.len(), 2);
219 }
220
221 #[test]
222 fn test_detect_code_blocks_with_language() {
223 let content = "Text\n```rust\nfn main() {}\n```\nMore";
225 let blocks = CodeBlockUtils::detect_code_blocks(content);
226 assert_eq!(blocks.len(), 1);
228 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
230 assert!(fenced.is_some());
231 }
232
233 #[test]
234 fn test_unclosed_code_block() {
235 let content = "Text\n```\ncode here\nno closing fence";
237 let blocks = CodeBlockUtils::detect_code_blocks(content);
238 assert_eq!(blocks.len(), 1);
239 assert_eq!(blocks[0].1, content.len());
240 }
241
242 #[test]
243 fn test_indented_code_blocks() {
244 let content = "Paragraph\n\n code line 1\n code line 2\n\nMore text";
246 let blocks = CodeBlockUtils::detect_code_blocks(content);
247 assert_eq!(blocks.len(), 1);
248 assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
249 assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
250
251 let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
253 let blocks = CodeBlockUtils::detect_code_blocks(content);
254 assert_eq!(blocks.len(), 1);
255 }
256
257 #[test]
258 fn test_indented_code_requires_blank_line() {
259 let content = "Paragraph\n indented but not code\nMore text";
261 let blocks = CodeBlockUtils::detect_code_blocks(content);
262 assert_eq!(blocks.len(), 0);
263
264 let content = "Paragraph\n\n now it's code\nMore text";
266 let blocks = CodeBlockUtils::detect_code_blocks(content);
267 assert_eq!(blocks.len(), 1);
268 }
269
270 #[test]
271 fn test_indented_content_with_list_markers_is_code_block() {
272 let content = "List:\n\n - Item 1\n - Item 2\n * Item 3\n + Item 4";
277 let blocks = CodeBlockUtils::detect_code_blocks(content);
278 assert_eq!(blocks.len(), 1); let content = "List:\n\n 1. First\n 2. Second";
282 let blocks = CodeBlockUtils::detect_code_blocks(content);
283 assert_eq!(blocks.len(), 1); }
285
286 #[test]
287 fn test_actual_list_items_not_code_blocks() {
288 let content = "- Item 1\n- Item 2\n* Item 3";
290 let blocks = CodeBlockUtils::detect_code_blocks(content);
291 assert_eq!(blocks.len(), 0);
292
293 let content = "- Item 1\n - Nested item\n- Item 2";
295 let blocks = CodeBlockUtils::detect_code_blocks(content);
296 assert_eq!(blocks.len(), 0);
297 }
298
299 #[test]
300 fn test_inline_code_spans_not_detected() {
301 let content = "Text with `inline code` here";
303 let blocks = CodeBlockUtils::detect_code_blocks(content);
304 assert_eq!(blocks.len(), 0); let content = "Text with ``code with ` backtick`` here";
308 let blocks = CodeBlockUtils::detect_code_blocks(content);
309 assert_eq!(blocks.len(), 0); let content = "Has `code1` and `code2` spans";
313 let blocks = CodeBlockUtils::detect_code_blocks(content);
314 assert_eq!(blocks.len(), 0); }
316
317 #[test]
318 fn test_unclosed_code_span() {
319 let content = "Text with `unclosed code span";
321 let blocks = CodeBlockUtils::detect_code_blocks(content);
322 assert_eq!(blocks.len(), 0);
323
324 let content = "Text with ``one style` different close";
326 let blocks = CodeBlockUtils::detect_code_blocks(content);
327 assert_eq!(blocks.len(), 0);
328 }
329
330 #[test]
331 fn test_mixed_code_blocks_and_spans() {
332 let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
333 let blocks = CodeBlockUtils::detect_code_blocks(content);
334 assert_eq!(blocks.len(), 1);
336
337 assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
339 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
341 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
342 }
343
344 #[test]
345 fn test_is_in_code_block_or_span() {
346 let blocks = vec![(10, 20), (30, 40), (50, 60)];
347
348 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
350 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
351 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
352
353 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
359 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
360 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
361 }
362
363 #[test]
364 fn test_empty_content() {
365 let blocks = CodeBlockUtils::detect_code_blocks("");
366 assert_eq!(blocks.len(), 0);
367 }
368
369 #[test]
370 fn test_code_block_at_start() {
371 let content = "```\ncode\n```\nText after";
372 let blocks = CodeBlockUtils::detect_code_blocks(content);
373 assert_eq!(blocks.len(), 1);
375 assert_eq!(blocks[0].0, 0); }
377
378 #[test]
379 fn test_code_block_at_end() {
380 let content = "Text before\n```\ncode\n```";
381 let blocks = CodeBlockUtils::detect_code_blocks(content);
382 assert_eq!(blocks.len(), 1);
384 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
386 assert!(fenced.is_some());
387 }
388
389 #[test]
390 fn test_nested_fence_markers() {
391 let content = "Text\n````\n```\nnested\n```\n````\nAfter";
393 let blocks = CodeBlockUtils::detect_code_blocks(content);
394 assert!(!blocks.is_empty());
396 let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
398 assert!(outer.is_some());
399 }
400
401 #[test]
402 fn test_indented_code_with_blank_lines() {
403 let content = "Text\n\n line1\n\n line2\n\nAfter";
405 let blocks = CodeBlockUtils::detect_code_blocks(content);
406 assert!(!blocks.is_empty());
408 let all_content: String = blocks
410 .iter()
411 .map(|(s, e)| &content[*s..*e])
412 .collect::<Vec<_>>()
413 .join("");
414 assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
415 }
416
417 #[test]
418 fn test_code_span_with_spaces() {
419 let content = "Text ` code with spaces ` more";
421 let blocks = CodeBlockUtils::detect_code_blocks(content);
422 assert_eq!(blocks.len(), 0); }
424
425 #[test]
426 fn test_fenced_block_with_info_string() {
427 let content = "```rust,no_run,should_panic\ncode\n```";
429 let blocks = CodeBlockUtils::detect_code_blocks(content);
430 assert_eq!(blocks.len(), 1);
432 assert_eq!(blocks[0].0, 0);
433 }
434
435 #[test]
436 fn test_indented_fences_not_code_blocks() {
437 let content = "Text\n ```\n code\n ```\nAfter";
439 let blocks = CodeBlockUtils::detect_code_blocks(content);
440 assert_eq!(blocks.len(), 1);
442 }
443
444 #[test]
446 fn test_backticks_in_info_string_not_code_block() {
447 let content = "```something```\n\n```bash\n# comment\n```";
453 let blocks = CodeBlockUtils::detect_code_blocks(content);
454 assert_eq!(blocks.len(), 1);
456 assert!(content[blocks[0].0..blocks[0].1].contains("# comment"));
458 }
459
460 #[test]
461 fn test_issue_175_reproduction() {
462 let content = "```something```\n\n```bash\n# Have a parrot\necho \"🦜\"\n```";
464 let blocks = CodeBlockUtils::detect_code_blocks(content);
465 assert_eq!(blocks.len(), 1);
467 assert!(content[blocks[0].0..blocks[0].1].contains("Have a parrot"));
468 }
469
470 #[test]
471 fn test_tilde_fence_allows_tildes_in_info_string() {
472 let content = "~~~abc~~~\ncode content\n~~~";
475 let blocks = CodeBlockUtils::detect_code_blocks(content);
476 assert_eq!(blocks.len(), 1);
478 }
479
480 #[test]
481 fn test_nested_longer_fence_contains_shorter() {
482 let content = "````\n```\nnested content\n```\n````";
484 let blocks = CodeBlockUtils::detect_code_blocks(content);
485 assert_eq!(blocks.len(), 1);
486 assert!(content[blocks[0].0..blocks[0].1].contains("nested content"));
487 }
488
489 #[test]
490 fn test_mixed_fence_types() {
491 let content = "~~~\n```\nmixed content\n~~~";
493 let blocks = CodeBlockUtils::detect_code_blocks(content);
494 assert_eq!(blocks.len(), 1);
495 assert!(content[blocks[0].0..blocks[0].1].contains("mixed content"));
496 }
497}