rumdl_lib/utils/
code_block_utils.rs1use crate::rules::blockquote_utils::BlockquoteUtils;
5use lazy_static::lazy_static;
6use regex::Regex;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum CodeBlockContext {
11 Standalone,
13 Indented,
15 Adjacent,
17}
18
19lazy_static! {
20 static ref CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(```|~~~)").unwrap();
21 static ref CODE_SPAN_PATTERN: Regex = Regex::new(r"`+").unwrap();
22}
23
24pub struct CodeBlockUtils;
26
27impl CodeBlockUtils {
28 pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
30 let mut blocks = Vec::new();
31 let mut in_code_block = false;
32 let mut code_block_start = 0;
33 let mut opening_fence_char = ' ';
34 let mut opening_fence_len = 0;
35
36 let lines: Vec<&str> = content.lines().collect();
38 let mut line_positions = Vec::with_capacity(lines.len());
39 let mut pos = 0;
40 for line in &lines {
41 line_positions.push(pos);
42 pos += line.len() + 1; }
44
45 for (i, line) in lines.iter().enumerate() {
47 let line_start = line_positions[i];
48
49 let mut line_without_blockquote = line.to_string();
52 while BlockquoteUtils::is_blockquote(&line_without_blockquote) {
53 line_without_blockquote = BlockquoteUtils::extract_content(&line_without_blockquote);
54 }
55
56 let trimmed = line_without_blockquote.trim_start();
57
58 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
60 let fence_char = trimmed.chars().next().unwrap();
61 let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
62
63 if !in_code_block && fence_len >= 3 {
64 code_block_start = line_start;
66 in_code_block = true;
67 opening_fence_char = fence_char;
68 opening_fence_len = fence_len;
69 } else if in_code_block && fence_char == opening_fence_char && fence_len >= opening_fence_len {
70 let code_block_end = line_start + line.len();
72 blocks.push((code_block_start, code_block_end));
73 in_code_block = false;
74 opening_fence_char = ' ';
75 opening_fence_len = 0;
76 }
77 }
79 }
80
81 if in_code_block {
83 blocks.push((code_block_start, content.len()));
84 }
85
86 let mut in_indented_block = false;
90 let mut indented_block_start = 0;
91
92 for (line_idx, line) in lines.iter().enumerate() {
93 let line_start = if line_idx < line_positions.len() {
94 line_positions[line_idx]
95 } else {
96 0
97 };
98
99 let mut line_without_blockquote = line.to_string();
101 while BlockquoteUtils::is_blockquote(&line_without_blockquote) {
102 line_without_blockquote = BlockquoteUtils::extract_content(&line_without_blockquote);
103 }
104
105 let is_indented = line_without_blockquote.starts_with(" ") || line_without_blockquote.starts_with("\t");
107
108 let trimmed = line_without_blockquote.trim_start();
110 let is_list_item = trimmed.starts_with("- ")
111 || trimmed.starts_with("* ")
112 || trimmed.starts_with("+ ")
113 || trimmed.chars().next().is_some_and(|c| c.is_numeric())
114 && trimmed.chars().nth(1).is_some_and(|c| c == '.' || c == ')');
115
116 let prev_line_without_blockquote = if line_idx > 0 {
118 let mut prev = lines[line_idx - 1].to_string();
119 while BlockquoteUtils::is_blockquote(&prev) {
120 prev = BlockquoteUtils::extract_content(&prev);
121 }
122 prev
123 } else {
124 String::new()
125 };
126 let prev_blank = line_idx > 0 && prev_line_without_blockquote.trim().is_empty();
127
128 if is_indented && !line_without_blockquote.trim().is_empty() && !is_list_item {
129 if !in_indented_block {
130 if prev_blank {
132 in_indented_block = true;
133 indented_block_start = line_start;
134 }
135 }
137 } else if in_indented_block {
138 let block_end = if line_idx > 0 && line_idx - 1 < line_positions.len() {
140 line_positions[line_idx - 1] + lines[line_idx - 1].len()
141 } else {
142 line_start
143 };
144 blocks.push((indented_block_start, block_end));
145 in_indented_block = false;
146 }
147 }
148
149 if in_indented_block {
151 blocks.push((indented_block_start, content.len()));
152 }
153
154 blocks.sort_by(|a, b| a.0.cmp(&b.0));
158 blocks
159 }
160
161 pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
163 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
165 }
166
167 pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
169 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
170 }
171
172 pub fn analyze_code_block_context(
175 lines: &[crate::lint_context::LineInfo],
176 line_idx: usize,
177 min_continuation_indent: usize,
178 ) -> CodeBlockContext {
179 if let Some(line_info) = lines.get(line_idx) {
180 if line_info.indent >= min_continuation_indent {
182 return CodeBlockContext::Indented;
183 }
184
185 let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
187
188 if prev_blanks > 0 || next_blanks > 0 {
191 return CodeBlockContext::Standalone;
192 }
193
194 CodeBlockContext::Adjacent
196 } else {
197 CodeBlockContext::Adjacent
199 }
200 }
201
202 fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
204 let mut prev_blanks = 0;
205 let mut next_blanks = 0;
206
207 for i in (0..line_idx).rev() {
209 if let Some(line) = lines.get(i) {
210 if line.is_blank {
211 prev_blanks += 1;
212 } else {
213 break;
214 }
215 } else {
216 break;
217 }
218 }
219
220 for i in (line_idx + 1)..lines.len() {
222 if let Some(line) = lines.get(i) {
223 if line.is_blank {
224 next_blanks += 1;
225 } else {
226 break;
227 }
228 } else {
229 break;
230 }
231 }
232
233 (prev_blanks, next_blanks)
234 }
235
236 pub fn calculate_min_continuation_indent(
239 lines: &[crate::lint_context::LineInfo],
240 current_line_idx: usize,
241 ) -> usize {
242 for i in (0..current_line_idx).rev() {
244 if let Some(line_info) = lines.get(i) {
245 if let Some(list_item) = &line_info.list_item {
246 return if list_item.is_ordered {
248 list_item.marker_column + list_item.marker.len() + 1 } else {
250 list_item.marker_column + 2 };
252 }
253
254 if line_info.heading.is_some() || Self::is_structural_separator(&line_info.content) {
256 break;
257 }
258 }
259 }
260
261 0 }
263
264 fn is_structural_separator(content: &str) -> bool {
266 let trimmed = content.trim();
267 trimmed.starts_with("---")
268 || trimmed.starts_with("***")
269 || trimmed.starts_with("___")
270 || trimmed.contains('|') || trimmed.starts_with(">") }
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278
279 #[test]
280 fn test_detect_fenced_code_blocks() {
281 let content = "Some text\n```\ncode here\n```\nMore text";
286 let blocks = CodeBlockUtils::detect_code_blocks(content);
287 assert_eq!(blocks.len(), 1);
289
290 let fenced_block = blocks
292 .iter()
293 .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
294 assert!(fenced_block.is_some());
295
296 let content = "Some text\n~~~\ncode here\n~~~\nMore text";
298 let blocks = CodeBlockUtils::detect_code_blocks(content);
299 assert_eq!(blocks.len(), 1);
300 assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
301
302 let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
304 let blocks = CodeBlockUtils::detect_code_blocks(content);
305 assert_eq!(blocks.len(), 2);
307 }
308
309 #[test]
310 fn test_detect_code_blocks_with_language() {
311 let content = "Text\n```rust\nfn main() {}\n```\nMore";
313 let blocks = CodeBlockUtils::detect_code_blocks(content);
314 assert_eq!(blocks.len(), 1);
316 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
318 assert!(fenced.is_some());
319 }
320
321 #[test]
322 fn test_unclosed_code_block() {
323 let content = "Text\n```\ncode here\nno closing fence";
325 let blocks = CodeBlockUtils::detect_code_blocks(content);
326 assert_eq!(blocks.len(), 1);
327 assert_eq!(blocks[0].1, content.len());
328 }
329
330 #[test]
331 fn test_indented_code_blocks() {
332 let content = "Paragraph\n\n code line 1\n code line 2\n\nMore text";
334 let blocks = CodeBlockUtils::detect_code_blocks(content);
335 assert_eq!(blocks.len(), 1);
336 assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
337 assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
338
339 let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
341 let blocks = CodeBlockUtils::detect_code_blocks(content);
342 assert_eq!(blocks.len(), 1);
343 }
344
345 #[test]
346 fn test_indented_code_requires_blank_line() {
347 let content = "Paragraph\n indented but not code\nMore text";
349 let blocks = CodeBlockUtils::detect_code_blocks(content);
350 assert_eq!(blocks.len(), 0);
351
352 let content = "Paragraph\n\n now it's code\nMore text";
354 let blocks = CodeBlockUtils::detect_code_blocks(content);
355 assert_eq!(blocks.len(), 1);
356 }
357
358 #[test]
359 fn test_list_items_not_code_blocks() {
360 let content = "List:\n\n - Item 1\n - Item 2\n * Item 3\n + Item 4";
362 let blocks = CodeBlockUtils::detect_code_blocks(content);
363 assert_eq!(blocks.len(), 0);
364
365 let content = "List:\n\n 1. First\n 2. Second\n 1) Also first";
367 let blocks = CodeBlockUtils::detect_code_blocks(content);
368 assert_eq!(blocks.len(), 0);
369 }
370
371 #[test]
372 fn test_inline_code_spans_not_detected() {
373 let content = "Text with `inline code` here";
375 let blocks = CodeBlockUtils::detect_code_blocks(content);
376 assert_eq!(blocks.len(), 0); let content = "Text with ``code with ` backtick`` here";
380 let blocks = CodeBlockUtils::detect_code_blocks(content);
381 assert_eq!(blocks.len(), 0); let content = "Has `code1` and `code2` spans";
385 let blocks = CodeBlockUtils::detect_code_blocks(content);
386 assert_eq!(blocks.len(), 0); }
388
389 #[test]
390 fn test_unclosed_code_span() {
391 let content = "Text with `unclosed code span";
393 let blocks = CodeBlockUtils::detect_code_blocks(content);
394 assert_eq!(blocks.len(), 0);
395
396 let content = "Text with ``one style` different close";
398 let blocks = CodeBlockUtils::detect_code_blocks(content);
399 assert_eq!(blocks.len(), 0);
400 }
401
402 #[test]
403 fn test_mixed_code_blocks_and_spans() {
404 let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
405 let blocks = CodeBlockUtils::detect_code_blocks(content);
406 assert_eq!(blocks.len(), 1);
408
409 assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
411 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
413 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
414 }
415
416 #[test]
417 fn test_is_in_code_block_or_span() {
418 let blocks = vec![(10, 20), (30, 40), (50, 60)];
419
420 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
422 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
423 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
424
425 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
431 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
432 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
433 }
434
435 #[test]
436 fn test_empty_content() {
437 let blocks = CodeBlockUtils::detect_code_blocks("");
438 assert_eq!(blocks.len(), 0);
439 }
440
441 #[test]
442 fn test_code_block_at_start() {
443 let content = "```\ncode\n```\nText after";
444 let blocks = CodeBlockUtils::detect_code_blocks(content);
445 assert_eq!(blocks.len(), 1);
447 assert_eq!(blocks[0].0, 0); }
449
450 #[test]
451 fn test_code_block_at_end() {
452 let content = "Text before\n```\ncode\n```";
453 let blocks = CodeBlockUtils::detect_code_blocks(content);
454 assert_eq!(blocks.len(), 1);
456 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
458 assert!(fenced.is_some());
459 }
460
461 #[test]
462 fn test_nested_fence_markers() {
463 let content = "Text\n````\n```\nnested\n```\n````\nAfter";
465 let blocks = CodeBlockUtils::detect_code_blocks(content);
466 assert!(!blocks.is_empty());
468 let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
470 assert!(outer.is_some());
471 }
472
473 #[test]
474 fn test_indented_code_with_blank_lines() {
475 let content = "Text\n\n line1\n\n line2\n\nAfter";
477 let blocks = CodeBlockUtils::detect_code_blocks(content);
478 assert!(!blocks.is_empty());
480 let all_content: String = blocks
482 .iter()
483 .map(|(s, e)| &content[*s..*e])
484 .collect::<Vec<_>>()
485 .join("");
486 assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
487 }
488
489 #[test]
490 fn test_code_span_with_spaces() {
491 let content = "Text ` code with spaces ` more";
493 let blocks = CodeBlockUtils::detect_code_blocks(content);
494 assert_eq!(blocks.len(), 0); }
496
497 #[test]
498 fn test_fenced_block_with_info_string() {
499 let content = "```rust,no_run,should_panic\ncode\n```";
501 let blocks = CodeBlockUtils::detect_code_blocks(content);
502 assert_eq!(blocks.len(), 1);
504 assert_eq!(blocks[0].0, 0);
505 }
506
507 #[test]
508 fn test_indented_fences_not_code_blocks() {
509 let content = "Text\n ```\n code\n ```\nAfter";
511 let blocks = CodeBlockUtils::detect_code_blocks(content);
512 assert_eq!(blocks.len(), 1);
514 }
515}