rumdl_lib/utils/
code_block_utils.rs1use lazy_static::lazy_static;
5use regex::Regex;
6
7#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum CodeBlockContext {
10 Standalone,
12 Indented,
14 Adjacent,
16}
17
18lazy_static! {
19 static ref CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(```|~~~)").unwrap();
20 static ref CODE_SPAN_PATTERN: Regex = Regex::new(r"`+").unwrap();
21}
22
23pub struct CodeBlockUtils;
25
26impl CodeBlockUtils {
27 pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
29 let mut blocks = Vec::new();
30 let mut in_code_block = false;
31 let mut code_block_start = 0;
32 let mut opening_fence_char = ' ';
33 let mut opening_fence_len = 0;
34
35 let lines: Vec<&str> = content.lines().collect();
37 let mut line_positions = Vec::with_capacity(lines.len());
38 let mut pos = 0;
39 for line in &lines {
40 line_positions.push(pos);
41 pos += line.len() + 1; }
43
44 for (i, line) in lines.iter().enumerate() {
46 let line_start = line_positions[i];
47 let trimmed = line.trim_start();
48
49 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
51 let fence_char = trimmed.chars().next().unwrap();
52 let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
53
54 if !in_code_block && fence_len >= 3 {
55 code_block_start = line_start;
57 in_code_block = true;
58 opening_fence_char = fence_char;
59 opening_fence_len = fence_len;
60 } else if in_code_block && fence_char == opening_fence_char && fence_len >= opening_fence_len {
61 let code_block_end = line_start + line.len();
63 blocks.push((code_block_start, code_block_end));
64 in_code_block = false;
65 opening_fence_char = ' ';
66 opening_fence_len = 0;
67 }
68 }
70 }
71
72 if in_code_block {
74 blocks.push((code_block_start, content.len()));
75 }
76
77 let mut in_indented_block = false;
81 let mut indented_block_start = 0;
82
83 for (line_idx, line) in lines.iter().enumerate() {
84 let line_start = if line_idx < line_positions.len() {
85 line_positions[line_idx]
86 } else {
87 0
88 };
89
90 let is_indented = line.starts_with(" ") || line.starts_with("\t");
92
93 let trimmed = line.trim_start();
95 let is_list_item = trimmed.starts_with("- ")
96 || trimmed.starts_with("* ")
97 || trimmed.starts_with("+ ")
98 || trimmed.chars().next().is_some_and(|c| c.is_numeric())
99 && trimmed.chars().nth(1).is_some_and(|c| c == '.' || c == ')');
100
101 let prev_blank = line_idx > 0 && lines[line_idx - 1].trim().is_empty();
103
104 if is_indented && !line.trim().is_empty() && !is_list_item {
105 if !in_indented_block {
106 if prev_blank {
108 in_indented_block = true;
109 indented_block_start = line_start;
110 }
111 }
113 } else if in_indented_block {
114 let block_end = if line_idx > 0 && line_idx - 1 < line_positions.len() {
116 line_positions[line_idx - 1] + lines[line_idx - 1].len()
117 } else {
118 line_start
119 };
120 blocks.push((indented_block_start, block_end));
121 in_indented_block = false;
122 }
123 }
124
125 if in_indented_block {
127 blocks.push((indented_block_start, content.len()));
128 }
129
130 let mut i = 0;
132 while i < content.len() {
133 if let Some(m) = CODE_SPAN_PATTERN.find_at(content, i) {
134 let backtick_length = m.end() - m.start();
135 let start = m.start();
136
137 if backtick_length >= 3 {
139 let at_line_start = start == 0 || content.as_bytes()[start - 1] == b'\n';
141 if at_line_start {
142 i = m.end();
144 continue;
145 }
146 }
147
148 let is_escaped = start > 0 && content.as_bytes()[start - 1] == b'\\';
151
152 if is_escaped {
153 i = m.end();
155 continue;
156 }
157
158 let search_str = &content[m.end()..];
160 let backtick_pattern = "`".repeat(backtick_length);
161
162 let mut search_pos = 0;
164 let mut found_end = None;
165 while search_pos < search_str.len() {
166 if let Some(pos) = search_str[search_pos..].find(&backtick_pattern) {
167 let absolute_pos = m.end() + search_pos + pos;
168 if absolute_pos > 0 && content.as_bytes()[absolute_pos - 1] == b'\\' {
170 let advance = (pos + backtick_length).max(1);
173 search_pos += advance;
174 } else {
175 found_end = Some(search_pos + pos);
177 break;
178 }
179 } else {
180 break;
181 }
182 }
183
184 if let Some(end_pos) = found_end {
185 let end = m.end() + end_pos + backtick_length;
186 blocks.push((start, end));
187 i = end;
188 } else {
189 i = m.end();
190 }
191 } else {
192 break;
193 }
194 }
195
196 blocks.sort_by(|a, b| a.0.cmp(&b.0));
197 blocks
198 }
199
200 pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
202 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
203 }
204
205 pub fn analyze_code_block_context(
208 lines: &[crate::lint_context::LineInfo],
209 line_idx: usize,
210 min_continuation_indent: usize,
211 ) -> CodeBlockContext {
212 if let Some(line_info) = lines.get(line_idx) {
213 if line_info.indent >= min_continuation_indent {
215 return CodeBlockContext::Indented;
216 }
217
218 let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
220
221 if prev_blanks > 0 || next_blanks > 0 {
224 return CodeBlockContext::Standalone;
225 }
226
227 CodeBlockContext::Adjacent
229 } else {
230 CodeBlockContext::Adjacent
232 }
233 }
234
235 fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
237 let mut prev_blanks = 0;
238 let mut next_blanks = 0;
239
240 for i in (0..line_idx).rev() {
242 if let Some(line) = lines.get(i) {
243 if line.is_blank {
244 prev_blanks += 1;
245 } else {
246 break;
247 }
248 } else {
249 break;
250 }
251 }
252
253 for i in (line_idx + 1)..lines.len() {
255 if let Some(line) = lines.get(i) {
256 if line.is_blank {
257 next_blanks += 1;
258 } else {
259 break;
260 }
261 } else {
262 break;
263 }
264 }
265
266 (prev_blanks, next_blanks)
267 }
268
269 pub fn calculate_min_continuation_indent(
272 lines: &[crate::lint_context::LineInfo],
273 current_line_idx: usize,
274 ) -> usize {
275 for i in (0..current_line_idx).rev() {
277 if let Some(line_info) = lines.get(i) {
278 if let Some(list_item) = &line_info.list_item {
279 return if list_item.is_ordered {
281 list_item.marker_column + list_item.marker.len() + 1 } else {
283 list_item.marker_column + 2 };
285 }
286
287 if line_info.heading.is_some() || Self::is_structural_separator(&line_info.content) {
289 break;
290 }
291 }
292 }
293
294 0 }
296
297 fn is_structural_separator(content: &str) -> bool {
299 let trimmed = content.trim();
300 trimmed.starts_with("---")
301 || trimmed.starts_with("***")
302 || trimmed.starts_with("___")
303 || trimmed.contains('|') || trimmed.starts_with(">") }
306}
307
308#[cfg(test)]
309mod tests {
310 use super::*;
311
312 #[test]
313 fn test_detect_fenced_code_blocks() {
314 let content = "Some text\n```\ncode here\n```\nMore text";
319 let blocks = CodeBlockUtils::detect_code_blocks(content);
320 assert_eq!(blocks.len(), 1);
322
323 let fenced_block = blocks
325 .iter()
326 .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
327 assert!(fenced_block.is_some());
328
329 let content = "Some text\n~~~\ncode here\n~~~\nMore text";
331 let blocks = CodeBlockUtils::detect_code_blocks(content);
332 assert_eq!(blocks.len(), 1);
333 assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
334
335 let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
337 let blocks = CodeBlockUtils::detect_code_blocks(content);
338 assert_eq!(blocks.len(), 2);
340 }
341
342 #[test]
343 fn test_detect_code_blocks_with_language() {
344 let content = "Text\n```rust\nfn main() {}\n```\nMore";
346 let blocks = CodeBlockUtils::detect_code_blocks(content);
347 assert_eq!(blocks.len(), 1);
349 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
351 assert!(fenced.is_some());
352 }
353
354 #[test]
355 fn test_unclosed_code_block() {
356 let content = "Text\n```\ncode here\nno closing fence";
358 let blocks = CodeBlockUtils::detect_code_blocks(content);
359 assert_eq!(blocks.len(), 1);
360 assert_eq!(blocks[0].1, content.len());
361 }
362
363 #[test]
364 fn test_indented_code_blocks() {
365 let content = "Paragraph\n\n code line 1\n code line 2\n\nMore text";
367 let blocks = CodeBlockUtils::detect_code_blocks(content);
368 assert_eq!(blocks.len(), 1);
369 assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
370 assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
371
372 let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
374 let blocks = CodeBlockUtils::detect_code_blocks(content);
375 assert_eq!(blocks.len(), 1);
376 }
377
378 #[test]
379 fn test_indented_code_requires_blank_line() {
380 let content = "Paragraph\n indented but not code\nMore text";
382 let blocks = CodeBlockUtils::detect_code_blocks(content);
383 assert_eq!(blocks.len(), 0);
384
385 let content = "Paragraph\n\n now it's code\nMore text";
387 let blocks = CodeBlockUtils::detect_code_blocks(content);
388 assert_eq!(blocks.len(), 1);
389 }
390
391 #[test]
392 fn test_list_items_not_code_blocks() {
393 let content = "List:\n\n - Item 1\n - Item 2\n * Item 3\n + Item 4";
395 let blocks = CodeBlockUtils::detect_code_blocks(content);
396 assert_eq!(blocks.len(), 0);
397
398 let content = "List:\n\n 1. First\n 2. Second\n 1) Also first";
400 let blocks = CodeBlockUtils::detect_code_blocks(content);
401 assert_eq!(blocks.len(), 0);
402 }
403
404 #[test]
405 fn test_inline_code_spans() {
406 let content = "Text with `inline code` here";
408 let blocks = CodeBlockUtils::detect_code_blocks(content);
409 assert_eq!(blocks.len(), 1);
410 assert_eq!(&content[blocks[0].0..blocks[0].1], "`inline code`");
411
412 let content = "Text with ``code with ` backtick`` here";
414 let blocks = CodeBlockUtils::detect_code_blocks(content);
415 assert_eq!(blocks.len(), 1);
416 assert_eq!(&content[blocks[0].0..blocks[0].1], "``code with ` backtick``");
417
418 let content = "Has `code1` and `code2` spans";
420 let blocks = CodeBlockUtils::detect_code_blocks(content);
421 assert_eq!(blocks.len(), 2);
422 }
423
424 #[test]
425 fn test_unclosed_code_span() {
426 let content = "Text with `unclosed code span";
428 let blocks = CodeBlockUtils::detect_code_blocks(content);
429 assert_eq!(blocks.len(), 0);
430
431 let content = "Text with ``one style` different close";
433 let blocks = CodeBlockUtils::detect_code_blocks(content);
434 assert_eq!(blocks.len(), 0);
435 }
436
437 #[test]
438 fn test_mixed_code_blocks_and_spans() {
439 let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
440 let blocks = CodeBlockUtils::detect_code_blocks(content);
441 assert!(blocks.len() >= 3);
444
445 assert!(blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
447 assert!(blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
448 assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
449
450 for i in 1..blocks.len() {
452 assert!(blocks[i - 1].0 <= blocks[i].0);
453 }
454 }
455
456 #[test]
457 fn test_is_in_code_block_or_span() {
458 let blocks = vec![(10, 20), (30, 40), (50, 60)];
459
460 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
462 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
463 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
464
465 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
471 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
472 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
473 }
474
475 #[test]
476 fn test_empty_content() {
477 let blocks = CodeBlockUtils::detect_code_blocks("");
478 assert_eq!(blocks.len(), 0);
479 }
480
481 #[test]
482 fn test_code_block_at_start() {
483 let content = "```\ncode\n```\nText after";
484 let blocks = CodeBlockUtils::detect_code_blocks(content);
485 assert_eq!(blocks.len(), 1);
487 assert_eq!(blocks[0].0, 0); }
489
490 #[test]
491 fn test_code_block_at_end() {
492 let content = "Text before\n```\ncode\n```";
493 let blocks = CodeBlockUtils::detect_code_blocks(content);
494 assert_eq!(blocks.len(), 1);
496 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
498 assert!(fenced.is_some());
499 }
500
501 #[test]
502 fn test_nested_fence_markers() {
503 let content = "Text\n````\n```\nnested\n```\n````\nAfter";
505 let blocks = CodeBlockUtils::detect_code_blocks(content);
506 assert!(!blocks.is_empty());
508 let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
510 assert!(outer.is_some());
511 }
512
513 #[test]
514 fn test_indented_code_with_blank_lines() {
515 let content = "Text\n\n line1\n\n line2\n\nAfter";
517 let blocks = CodeBlockUtils::detect_code_blocks(content);
518 assert!(!blocks.is_empty());
520 let all_content: String = blocks
522 .iter()
523 .map(|(s, e)| &content[*s..*e])
524 .collect::<Vec<_>>()
525 .join("");
526 assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
527 }
528
529 #[test]
530 fn test_code_span_with_spaces() {
531 let content = "Text ` code with spaces ` more";
533 let blocks = CodeBlockUtils::detect_code_blocks(content);
534 assert_eq!(blocks.len(), 1);
535 assert_eq!(&content[blocks[0].0..blocks[0].1], "` code with spaces `");
536 }
537
538 #[test]
539 fn test_fenced_block_with_info_string() {
540 let content = "```rust,no_run,should_panic\ncode\n```";
542 let blocks = CodeBlockUtils::detect_code_blocks(content);
543 assert_eq!(blocks.len(), 1);
545 assert_eq!(blocks[0].0, 0);
546 }
547
548 #[test]
549 fn test_indented_fences_not_code_blocks() {
550 let content = "Text\n ```\n code\n ```\nAfter";
552 let blocks = CodeBlockUtils::detect_code_blocks(content);
553 assert_eq!(blocks.len(), 2);
555 }
556}