rumdl_lib/utils/
code_block_utils.rs1use crate::rules::blockquote_utils::BlockquoteUtils;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum CodeBlockContext {
9 Standalone,
11 Indented,
13 Adjacent,
15}
16
17pub struct CodeBlockUtils;
19
20impl CodeBlockUtils {
21 pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
23 let mut blocks = Vec::new();
24 let mut in_code_block = false;
25 let mut code_block_start = 0;
26 let mut opening_fence_char = ' ';
27 let mut opening_fence_len = 0;
28
29 let lines: Vec<&str> = content.lines().collect();
31 let mut line_positions = Vec::with_capacity(lines.len());
32 let mut pos = 0;
33 for line in &lines {
34 line_positions.push(pos);
35 pos += line.len() + 1; }
37
38 for (i, line) in lines.iter().enumerate() {
40 let line_start = line_positions[i];
41
42 let mut line_without_blockquote = line.to_string();
45 while BlockquoteUtils::is_blockquote(&line_without_blockquote) {
46 line_without_blockquote = BlockquoteUtils::extract_content(&line_without_blockquote);
47 }
48
49 let trimmed = line_without_blockquote.trim_start();
50
51 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
53 let fence_char = trimmed.chars().next().unwrap();
54 let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
55
56 if !in_code_block && fence_len >= 3 {
57 code_block_start = line_start;
59 in_code_block = true;
60 opening_fence_char = fence_char;
61 opening_fence_len = fence_len;
62 } else if in_code_block && fence_char == opening_fence_char && fence_len >= opening_fence_len {
63 let code_block_end = line_start + line.len();
65 blocks.push((code_block_start, code_block_end));
66 in_code_block = false;
67 opening_fence_char = ' ';
68 opening_fence_len = 0;
69 }
70 }
72 }
73
74 if in_code_block {
76 blocks.push((code_block_start, content.len()));
77 }
78
79 let mut in_indented_block = false;
88 let mut indented_block_start = 0;
89 let mut in_list_context = false;
90 let mut list_continuation_indent: usize = 0;
91
92 for (line_idx, line) in lines.iter().enumerate() {
93 let line_start = if line_idx < line_positions.len() {
94 line_positions[line_idx]
95 } else {
96 0
97 };
98
99 let mut line_without_blockquote = line.to_string();
101 while BlockquoteUtils::is_blockquote(&line_without_blockquote) {
102 line_without_blockquote = BlockquoteUtils::extract_content(&line_without_blockquote);
103 }
104
105 let indent_level = line_without_blockquote.len() - line_without_blockquote.trim_start().len();
107 let is_indented = line_without_blockquote.starts_with(" ") || line_without_blockquote.starts_with("\t");
108
109 let trimmed = line_without_blockquote.trim_start();
111
112 let is_ordered_list = {
115 let mut chars = trimmed.chars();
116 let first_char = chars.next();
117 if !first_char.is_some_and(|c| c.is_numeric()) {
118 false
119 } else {
120 let delimiter_char_pos = trimmed.chars().position(|c| c == '.' || c == ')');
122 match delimiter_char_pos {
123 Some(char_pos) if char_pos > 0 => {
124 let byte_pos = trimmed.char_indices().nth(char_pos).map(|(i, _)| i);
126 if let Some(byte_pos) = byte_pos {
127 let all_digits = trimmed[..byte_pos].chars().all(|c| c.is_numeric());
129 let has_space =
131 trimmed.chars().nth(char_pos + 1).is_some_and(|c| c == ' ' || c == '\t');
132 all_digits && has_space
133 } else {
134 false
135 }
136 }
137 _ => false,
138 }
139 }
140 };
141
142 let is_list_item =
143 trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") || is_ordered_list;
144
145 let prev_line_without_blockquote = if line_idx > 0 {
147 let mut prev = lines[line_idx - 1].to_string();
148 while BlockquoteUtils::is_blockquote(&prev) {
149 prev = BlockquoteUtils::extract_content(&prev);
150 }
151 prev
152 } else {
153 String::new()
154 };
155 let prev_blank = line_idx > 0 && prev_line_without_blockquote.trim().is_empty();
156
157 if is_list_item {
159 in_list_context = true;
161
162 let marker_column = indent_level;
168 let marker_width =
169 if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
170 1 } else {
172 trimmed.chars().take_while(|c| c.is_numeric()).count() + 1
174 };
175
176 let after_marker = &trimmed[marker_width..];
179 let spaces_after_marker = after_marker.chars().take_while(|c| *c == ' ' || *c == '\t').count();
180
181 list_continuation_indent = marker_column + marker_width + spaces_after_marker;
184 } else if in_list_context
185 && !line_without_blockquote.trim().is_empty()
186 && indent_level < list_continuation_indent
187 {
188 in_list_context = false;
190 list_continuation_indent = 0;
191 }
192
193 let is_list_continuation_paragraph = in_list_context
199 && indent_level >= list_continuation_indent
200 && indent_level < (list_continuation_indent + 4);
201
202 let is_code_block_in_list = in_list_context && indent_level >= (list_continuation_indent + 4);
203
204 if is_indented && !line_without_blockquote.trim().is_empty() && !is_list_item {
206 if is_code_block_in_list {
207 if !in_indented_block && prev_blank {
210 in_indented_block = true;
211 indented_block_start = line_start;
212 }
213 } else if !is_list_continuation_paragraph {
214 if !in_indented_block && prev_blank {
216 in_indented_block = true;
217 indented_block_start = line_start;
218 }
219 }
220 } else if in_indented_block {
222 let block_end = if line_idx > 0 && line_idx - 1 < line_positions.len() {
224 line_positions[line_idx - 1] + lines[line_idx - 1].len()
225 } else {
226 line_start
227 };
228 blocks.push((indented_block_start, block_end));
229 in_indented_block = false;
230 }
231 }
232
233 if in_indented_block {
235 blocks.push((indented_block_start, content.len()));
236 }
237
238 blocks.sort_by(|a, b| a.0.cmp(&b.0));
242 blocks
243 }
244
245 pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
247 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
249 }
250
251 pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
253 blocks.iter().any(|&(start, end)| pos >= start && pos < end)
254 }
255
256 pub fn analyze_code_block_context(
259 lines: &[crate::lint_context::LineInfo],
260 line_idx: usize,
261 min_continuation_indent: usize,
262 ) -> CodeBlockContext {
263 if let Some(line_info) = lines.get(line_idx) {
264 if line_info.indent >= min_continuation_indent {
266 return CodeBlockContext::Indented;
267 }
268
269 let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
271
272 if prev_blanks > 0 || next_blanks > 0 {
275 return CodeBlockContext::Standalone;
276 }
277
278 CodeBlockContext::Adjacent
280 } else {
281 CodeBlockContext::Adjacent
283 }
284 }
285
286 fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
288 let mut prev_blanks = 0;
289 let mut next_blanks = 0;
290
291 for i in (0..line_idx).rev() {
293 if let Some(line) = lines.get(i) {
294 if line.is_blank {
295 prev_blanks += 1;
296 } else {
297 break;
298 }
299 } else {
300 break;
301 }
302 }
303
304 for i in (line_idx + 1)..lines.len() {
306 if let Some(line) = lines.get(i) {
307 if line.is_blank {
308 next_blanks += 1;
309 } else {
310 break;
311 }
312 } else {
313 break;
314 }
315 }
316
317 (prev_blanks, next_blanks)
318 }
319
320 pub fn calculate_min_continuation_indent(
323 content: &str,
324 lines: &[crate::lint_context::LineInfo],
325 current_line_idx: usize,
326 ) -> usize {
327 for i in (0..current_line_idx).rev() {
329 if let Some(line_info) = lines.get(i) {
330 if let Some(list_item) = &line_info.list_item {
331 return if list_item.is_ordered {
333 list_item.marker_column + list_item.marker.len() + 1 } else {
335 list_item.marker_column + 2 };
337 }
338
339 if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
341 break;
342 }
343 }
344 }
345
346 0 }
348
349 fn is_structural_separator(content: &str) -> bool {
351 let trimmed = content.trim();
352 trimmed.starts_with("---")
353 || trimmed.starts_with("***")
354 || trimmed.starts_with("___")
355 || trimmed.contains('|') || trimmed.starts_with(">") }
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363
364 #[test]
365 fn test_detect_fenced_code_blocks() {
366 let content = "Some text\n```\ncode here\n```\nMore text";
371 let blocks = CodeBlockUtils::detect_code_blocks(content);
372 assert_eq!(blocks.len(), 1);
374
375 let fenced_block = blocks
377 .iter()
378 .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
379 assert!(fenced_block.is_some());
380
381 let content = "Some text\n~~~\ncode here\n~~~\nMore text";
383 let blocks = CodeBlockUtils::detect_code_blocks(content);
384 assert_eq!(blocks.len(), 1);
385 assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
386
387 let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
389 let blocks = CodeBlockUtils::detect_code_blocks(content);
390 assert_eq!(blocks.len(), 2);
392 }
393
394 #[test]
395 fn test_detect_code_blocks_with_language() {
396 let content = "Text\n```rust\nfn main() {}\n```\nMore";
398 let blocks = CodeBlockUtils::detect_code_blocks(content);
399 assert_eq!(blocks.len(), 1);
401 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
403 assert!(fenced.is_some());
404 }
405
406 #[test]
407 fn test_unclosed_code_block() {
408 let content = "Text\n```\ncode here\nno closing fence";
410 let blocks = CodeBlockUtils::detect_code_blocks(content);
411 assert_eq!(blocks.len(), 1);
412 assert_eq!(blocks[0].1, content.len());
413 }
414
415 #[test]
416 fn test_indented_code_blocks() {
417 let content = "Paragraph\n\n code line 1\n code line 2\n\nMore text";
419 let blocks = CodeBlockUtils::detect_code_blocks(content);
420 assert_eq!(blocks.len(), 1);
421 assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
422 assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
423
424 let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
426 let blocks = CodeBlockUtils::detect_code_blocks(content);
427 assert_eq!(blocks.len(), 1);
428 }
429
430 #[test]
431 fn test_indented_code_requires_blank_line() {
432 let content = "Paragraph\n indented but not code\nMore text";
434 let blocks = CodeBlockUtils::detect_code_blocks(content);
435 assert_eq!(blocks.len(), 0);
436
437 let content = "Paragraph\n\n now it's code\nMore text";
439 let blocks = CodeBlockUtils::detect_code_blocks(content);
440 assert_eq!(blocks.len(), 1);
441 }
442
443 #[test]
444 fn test_list_items_not_code_blocks() {
445 let content = "List:\n\n - Item 1\n - Item 2\n * Item 3\n + Item 4";
447 let blocks = CodeBlockUtils::detect_code_blocks(content);
448 assert_eq!(blocks.len(), 0);
449
450 let content = "List:\n\n 1. First\n 2. Second\n 1) Also first";
452 let blocks = CodeBlockUtils::detect_code_blocks(content);
453 assert_eq!(blocks.len(), 0);
454 }
455
456 #[test]
457 fn test_inline_code_spans_not_detected() {
458 let content = "Text with `inline code` here";
460 let blocks = CodeBlockUtils::detect_code_blocks(content);
461 assert_eq!(blocks.len(), 0); let content = "Text with ``code with ` backtick`` here";
465 let blocks = CodeBlockUtils::detect_code_blocks(content);
466 assert_eq!(blocks.len(), 0); let content = "Has `code1` and `code2` spans";
470 let blocks = CodeBlockUtils::detect_code_blocks(content);
471 assert_eq!(blocks.len(), 0); }
473
474 #[test]
475 fn test_unclosed_code_span() {
476 let content = "Text with `unclosed code span";
478 let blocks = CodeBlockUtils::detect_code_blocks(content);
479 assert_eq!(blocks.len(), 0);
480
481 let content = "Text with ``one style` different close";
483 let blocks = CodeBlockUtils::detect_code_blocks(content);
484 assert_eq!(blocks.len(), 0);
485 }
486
487 #[test]
488 fn test_mixed_code_blocks_and_spans() {
489 let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
490 let blocks = CodeBlockUtils::detect_code_blocks(content);
491 assert_eq!(blocks.len(), 1);
493
494 assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
496 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
498 assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
499 }
500
501 #[test]
502 fn test_is_in_code_block_or_span() {
503 let blocks = vec![(10, 20), (30, 40), (50, 60)];
504
505 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
507 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
508 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
509
510 assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
516 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
517 assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
518 }
519
520 #[test]
521 fn test_empty_content() {
522 let blocks = CodeBlockUtils::detect_code_blocks("");
523 assert_eq!(blocks.len(), 0);
524 }
525
526 #[test]
527 fn test_code_block_at_start() {
528 let content = "```\ncode\n```\nText after";
529 let blocks = CodeBlockUtils::detect_code_blocks(content);
530 assert_eq!(blocks.len(), 1);
532 assert_eq!(blocks[0].0, 0); }
534
535 #[test]
536 fn test_code_block_at_end() {
537 let content = "Text before\n```\ncode\n```";
538 let blocks = CodeBlockUtils::detect_code_blocks(content);
539 assert_eq!(blocks.len(), 1);
541 let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
543 assert!(fenced.is_some());
544 }
545
546 #[test]
547 fn test_nested_fence_markers() {
548 let content = "Text\n````\n```\nnested\n```\n````\nAfter";
550 let blocks = CodeBlockUtils::detect_code_blocks(content);
551 assert!(!blocks.is_empty());
553 let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
555 assert!(outer.is_some());
556 }
557
558 #[test]
559 fn test_indented_code_with_blank_lines() {
560 let content = "Text\n\n line1\n\n line2\n\nAfter";
562 let blocks = CodeBlockUtils::detect_code_blocks(content);
563 assert!(!blocks.is_empty());
565 let all_content: String = blocks
567 .iter()
568 .map(|(s, e)| &content[*s..*e])
569 .collect::<Vec<_>>()
570 .join("");
571 assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
572 }
573
574 #[test]
575 fn test_code_span_with_spaces() {
576 let content = "Text ` code with spaces ` more";
578 let blocks = CodeBlockUtils::detect_code_blocks(content);
579 assert_eq!(blocks.len(), 0); }
581
582 #[test]
583 fn test_fenced_block_with_info_string() {
584 let content = "```rust,no_run,should_panic\ncode\n```";
586 let blocks = CodeBlockUtils::detect_code_blocks(content);
587 assert_eq!(blocks.len(), 1);
589 assert_eq!(blocks[0].0, 0);
590 }
591
592 #[test]
593 fn test_indented_fences_not_code_blocks() {
594 let content = "Text\n ```\n code\n ```\nAfter";
596 let blocks = CodeBlockUtils::detect_code_blocks(content);
597 assert_eq!(blocks.len(), 1);
599 }
600}