1use crate::utils::range_utils::LineIndex;
2use lazy_static::lazy_static;
3use regex::Regex;
4use std::fmt;
5
6lazy_static! {
7 static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap();
9 static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)```\s*$").unwrap();
10 static ref ALTERNATE_FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap();
11 static ref ALTERNATE_FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)~~~\s*$").unwrap();
12 static ref INDENTED_CODE_BLOCK: Regex = Regex::new(r"^(\s{4,})").unwrap();
13 static ref LIST_ITEM_RE: Regex = Regex::new(r"^(\s*)([*+-]|\d+[.)])(\s*)(.*)$").unwrap();
14}
15
16pub struct CodeBlockUtils;
18
19impl CodeBlockUtils {
20 pub fn is_in_code_block(content: &str, line_num: usize) -> bool {
22 let lines: Vec<&str> = content.lines().collect();
23 if line_num >= lines.len() {
24 return false;
25 }
26
27 let mut in_fenced_code = false;
28 let mut in_alternate_fenced = false;
29
30 for (i, line) in lines.iter().enumerate() {
31 if i > line_num {
32 break;
33 }
34
35 if FENCED_CODE_BLOCK_START.is_match(line) {
36 in_fenced_code = !in_fenced_code;
37 } else if FENCED_CODE_BLOCK_END.is_match(line) && in_fenced_code {
38 in_fenced_code = false;
39 } else if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
40 in_alternate_fenced = !in_alternate_fenced;
41 } else if ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) && in_alternate_fenced {
42 in_alternate_fenced = false;
43 }
44 }
45
46 if line_num < lines.len() && Self::is_indented_code_block(lines[line_num]) {
48 return true;
49 }
50
51 in_fenced_code || in_alternate_fenced
53 }
54
55 pub fn is_code_block_delimiter(line: &str) -> bool {
57 FENCED_CODE_BLOCK_START.is_match(line)
58 || FENCED_CODE_BLOCK_END.is_match(line)
59 || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
60 || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
61 }
62
63 pub fn is_code_block_start(line: &str) -> bool {
65 FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
66 }
67
68 pub fn is_code_block_end(line: &str) -> bool {
70 FENCED_CODE_BLOCK_END.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
71 }
72
73 pub fn is_indented_code_block(line: &str) -> bool {
75 let expanded_line = line.replace('\t', " ");
77 INDENTED_CODE_BLOCK.is_match(&expanded_line)
78 }
79
80 pub fn get_language_specifier(line: &str) -> Option<String> {
106 if FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
107 let trimmed = line.trim_start();
108 let after_fence = &trimmed[3..].trim_start();
109 if !after_fence.is_empty() {
110 return Some(after_fence.to_string());
111 }
112 }
113 None
114 }
115
116 pub fn identify_code_block_lines(content: &str) -> Vec<bool> {
145 let lines: Vec<&str> = content.lines().collect();
146 let mut in_code_block = vec![false; lines.len()];
147
148 let mut in_fenced_code = false;
149 let mut in_alternate_fenced = false;
150
151 for (i, line) in lines.iter().enumerate() {
152 let trimmed = line.trim_start();
154
155 if trimmed.starts_with("```") {
156 if FENCED_CODE_BLOCK_START.is_match(line) {
157 in_fenced_code = !in_fenced_code;
158 in_code_block[i] = true; } else if in_fenced_code && FENCED_CODE_BLOCK_END.is_match(line) {
160 in_fenced_code = false;
161 in_code_block[i] = true; }
163 } else if trimmed.starts_with("~~~") {
164 if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
165 in_alternate_fenced = !in_alternate_fenced;
166 in_code_block[i] = true; } else if in_alternate_fenced && ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) {
168 in_alternate_fenced = false;
169 in_code_block[i] = true; }
171 }
172
173 if in_fenced_code || in_alternate_fenced {
175 in_code_block[i] = true;
176 } else if !in_code_block[i] {
177 if (line.starts_with(" ") || INDENTED_CODE_BLOCK.is_match(line)) && !LIST_ITEM_RE.is_match(line) {
180 in_code_block[i] = true;
181 }
182 }
183 }
184
185 in_code_block
186 }
187}
188
189lazy_static! {
191 static ref FENCED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(?:```|~~~)").unwrap();
192 static ref INDENTED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(\s{4,})").unwrap();
193 static ref BACKTICK_PATTERN: Regex = Regex::new(r"(`+)").unwrap();
194}
195
196#[derive(Debug, PartialEq, Clone, Copy)]
198pub enum CodeBlockState {
199 None,
200 Fenced,
201 Indented,
202}
203
204#[derive(Debug)]
206pub struct CodeBlockInfo {
207 pub block_states: Vec<CodeBlockState>,
209 pub code_spans: Vec<(usize, usize)>,
211 content: String,
213 line_index: LineIndex,
215}
216
217impl CodeBlockInfo {
218 pub fn new(content: &str) -> Self {
220 let block_states = compute_code_blocks(content);
221 let code_spans = compute_code_spans(content);
222 let line_index = LineIndex::new(content.to_string());
223
224 CodeBlockInfo {
225 block_states,
226 code_spans,
227 content: content.to_string(),
228 line_index,
229 }
230 }
231
232 pub fn is_in_code_block(&self, line_index: usize) -> bool {
234 if line_index < self.block_states.len() {
235 self.block_states[line_index] != CodeBlockState::None
236 } else {
237 false
238 }
239 }
240
241 pub fn is_in_code_span(&self, line_index: usize, column_index: usize) -> bool {
243 let line_start = self
245 .line_index
246 .get_line_start_byte(line_index + 1)
247 .unwrap_or(self.content.len());
248 let position = line_start + column_index;
249
250 for &(start, end) in &self.code_spans {
252 if position >= start && position <= end {
253 return true;
254 }
255 }
256
257 false
258 }
259
260 pub fn has_code_blocks(&self) -> bool {
262 self.block_states.iter().any(|state| *state != CodeBlockState::None)
263 }
264
265 pub fn has_code_spans(&self) -> bool {
267 !self.code_spans.is_empty()
268 }
269}
270
271pub fn compute_code_blocks(content: &str) -> Vec<CodeBlockState> {
273 let mut in_fenced_block = false;
274 let mut result = Vec::new();
275 let mut fence_marker = "";
276
277 for line in content.lines() {
278 if in_fenced_block {
279 if line.trim().starts_with(fence_marker) {
280 in_fenced_block = false;
281 result.push(CodeBlockState::Fenced); } else {
283 result.push(CodeBlockState::Fenced);
284 }
285 } else if FENCED_CODE_BLOCK_PATTERN.is_match(line) {
286 in_fenced_block = true;
287 fence_marker = if line.trim().starts_with("```") { "```" } else { "~~~" };
288 result.push(CodeBlockState::Fenced); } else if !line.trim().is_empty() {
290 let expanded_line = line.replace('\t', " ");
292 if INDENTED_CODE_BLOCK_PATTERN.is_match(&expanded_line) {
293 result.push(CodeBlockState::Indented);
294 } else {
295 result.push(CodeBlockState::None);
296 }
297 } else {
298 result.push(CodeBlockState::None);
299 }
300 }
301
302 result
303}
304
305pub fn compute_code_spans(content: &str) -> Vec<(usize, usize)> {
307 let mut spans = Vec::new();
308
309 let chars: Vec<char> = content.chars().collect();
311 let mut i = 0;
312
313 while i < chars.len() {
314 if i > 0 && chars[i] == '`' && chars[i - 1] == '\\' {
316 i += 1;
317 continue;
318 }
319
320 if chars[i] == '`' {
322 let mut backtick_count = 1;
323 let start_idx = i;
324
325 i += 1;
327 while i < chars.len() && chars[i] == '`' {
328 backtick_count += 1;
329 i += 1;
330 }
331
332 if is_likely_code_block_delimiter(&chars, start_idx) {
335 continue;
336 }
337
338 let mut j = i;
340 let mut found_closing = false;
341
342 while j < chars.len() {
343 if j > 0 && chars[j] == '`' && chars[j - 1] == '\\' {
345 j += 1;
346 continue;
347 }
348
349 if chars[j] == '`' {
350 let mut closing_count = 1;
351 let potential_end = j;
352
353 j += 1;
355 while j < chars.len() && chars[j] == '`' {
356 closing_count += 1;
357 j += 1;
358 }
359
360 if closing_count == backtick_count {
362 let start_byte = chars[..start_idx].iter().map(|c| c.len_utf8()).sum();
364 let end_byte = chars[..potential_end + closing_count]
365 .iter()
366 .map(|c| c.len_utf8())
367 .sum();
368
369 spans.push((start_byte, end_byte));
370 i = j; found_closing = true;
372 break;
373 }
374 }
375
376 j += 1;
377 }
378
379 if !found_closing {
380 continue;
382 }
383 } else {
384 i += 1;
385 }
386 }
387
388 spans
389}
390
391fn is_likely_code_block_delimiter(chars: &[char], start_idx: usize) -> bool {
393 let mut count = 0;
394 let mut i = start_idx;
395
396 while i < chars.len() && chars[i] == '`' {
398 count += 1;
399 i += 1;
400 }
401
402 if count < 3 {
403 return false;
405 }
406
407 let mut j = start_idx;
409 if j > 0 {
410 j -= 1;
411 while j > 0 && chars[j] != '\n' {
413 if !chars[j].is_whitespace() {
414 return false;
416 }
417 j -= 1;
418 }
419 }
420
421 true
422}
423
424#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
426pub enum CodeBlockStyle {
427 #[default]
429 Consistent,
430 Indented,
432 Fenced,
434}
435
436impl fmt::Display for CodeBlockStyle {
437 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438 match self {
439 CodeBlockStyle::Fenced => write!(f, "fenced"),
440 CodeBlockStyle::Indented => write!(f, "indented"),
441 CodeBlockStyle::Consistent => write!(f, "consistent"),
442 }
443 }
444}
445
446#[cfg(test)]
447mod tests {
448 use super::*;
449
450 #[test]
451 fn test_is_in_code_block() {
452 let content = "Normal text
453```rust
454let x = 1;
455```
456More text";
457
458 assert!(!CodeBlockUtils::is_in_code_block(content, 0));
459 assert!(CodeBlockUtils::is_in_code_block(content, 1));
460 assert!(CodeBlockUtils::is_in_code_block(content, 2));
461 assert!(!CodeBlockUtils::is_in_code_block(content, 3)); assert!(!CodeBlockUtils::is_in_code_block(content, 4));
463
464 let content2 = "Text\n~~~\ncode\n~~~\nEnd";
466 assert!(!CodeBlockUtils::is_in_code_block(content2, 0));
467 assert!(CodeBlockUtils::is_in_code_block(content2, 1));
468 assert!(CodeBlockUtils::is_in_code_block(content2, 2));
469 assert!(!CodeBlockUtils::is_in_code_block(content2, 3)); assert!(!CodeBlockUtils::is_in_code_block(content2, 4));
471
472 let content3 = "Normal\n indented code\nNormal";
474 assert!(!CodeBlockUtils::is_in_code_block(content3, 0));
475 assert!(CodeBlockUtils::is_in_code_block(content3, 1));
476 assert!(!CodeBlockUtils::is_in_code_block(content3, 2));
477
478 assert!(!CodeBlockUtils::is_in_code_block("test", 10));
480 }
481
482 #[test]
483 fn test_is_code_block_delimiter() {
484 assert!(CodeBlockUtils::is_code_block_delimiter("```"));
485 assert!(CodeBlockUtils::is_code_block_delimiter("```rust"));
486 assert!(CodeBlockUtils::is_code_block_delimiter(" ```"));
487 assert!(CodeBlockUtils::is_code_block_delimiter("~~~"));
488 assert!(CodeBlockUtils::is_code_block_delimiter("~~~python"));
489
490 assert!(!CodeBlockUtils::is_code_block_delimiter("Normal text"));
491 assert!(!CodeBlockUtils::is_code_block_delimiter("``"));
492 assert!(!CodeBlockUtils::is_code_block_delimiter("~"));
493 assert!(!CodeBlockUtils::is_code_block_delimiter(""));
494 }
495
496 #[test]
497 fn test_is_code_block_start() {
498 assert!(CodeBlockUtils::is_code_block_start("```"));
499 assert!(CodeBlockUtils::is_code_block_start("```rust"));
500 assert!(CodeBlockUtils::is_code_block_start("~~~"));
501 assert!(CodeBlockUtils::is_code_block_start("~~~python"));
502 assert!(CodeBlockUtils::is_code_block_start(" ```"));
503
504 assert!(!CodeBlockUtils::is_code_block_start("Normal text"));
505 assert!(!CodeBlockUtils::is_code_block_start(""));
506 }
507
508 #[test]
509 fn test_is_code_block_end() {
510 assert!(CodeBlockUtils::is_code_block_end("```"));
511 assert!(CodeBlockUtils::is_code_block_end("~~~"));
512 assert!(CodeBlockUtils::is_code_block_end(" ```"));
513 assert!(CodeBlockUtils::is_code_block_end("``` "));
514
515 assert!(!CodeBlockUtils::is_code_block_end("```rust"));
517 assert!(!CodeBlockUtils::is_code_block_end("~~~python"));
518 assert!(!CodeBlockUtils::is_code_block_end("Normal text"));
519 }
520
521 #[test]
522 fn test_is_indented_code_block() {
523 assert!(CodeBlockUtils::is_indented_code_block(" code"));
524 assert!(CodeBlockUtils::is_indented_code_block(" more indented"));
525 assert!(CodeBlockUtils::is_indented_code_block("\tcode")); assert!(CodeBlockUtils::is_indented_code_block("\t\tcode")); assert!(CodeBlockUtils::is_indented_code_block(" \tcode")); assert!(!CodeBlockUtils::is_indented_code_block(" code")); assert!(!CodeBlockUtils::is_indented_code_block("normal text"));
532 assert!(!CodeBlockUtils::is_indented_code_block(""));
533 }
534
535 #[test]
536 fn test_get_language_specifier() {
537 assert_eq!(
538 CodeBlockUtils::get_language_specifier("```rust"),
539 Some("rust".to_string())
540 );
541 assert_eq!(
542 CodeBlockUtils::get_language_specifier("~~~python"),
543 Some("python".to_string())
544 );
545 assert_eq!(
546 CodeBlockUtils::get_language_specifier("```javascript"),
547 Some("javascript".to_string())
548 );
549 assert_eq!(
550 CodeBlockUtils::get_language_specifier(" ```rust"),
551 Some("rust".to_string())
552 );
553 assert_eq!(
554 CodeBlockUtils::get_language_specifier("```rust ignore"),
555 Some("rust ignore".to_string())
556 );
557
558 assert_eq!(CodeBlockUtils::get_language_specifier("```"), None);
559 assert_eq!(CodeBlockUtils::get_language_specifier("~~~"), None);
560 assert_eq!(CodeBlockUtils::get_language_specifier("Normal text"), None);
561 assert_eq!(CodeBlockUtils::get_language_specifier(""), None);
562 }
563
564 #[test]
565 fn test_identify_code_block_lines() {
566 let content = "Normal text
567```rust
568let x = 1;
569```
570More text";
571
572 let result = CodeBlockUtils::identify_code_block_lines(content);
573 assert_eq!(result, vec![false, true, true, true, false]);
574
575 let content2 = "Text\n~~~\ncode\n~~~\nEnd";
577 let result2 = CodeBlockUtils::identify_code_block_lines(content2);
578 assert_eq!(result2, vec![false, true, true, true, false]);
579
580 let content3 = "Normal\n code\n more code\nNormal";
582 let result3 = CodeBlockUtils::identify_code_block_lines(content3);
583 assert_eq!(result3, vec![false, true, true, false]);
584
585 let content4 = "List:\n * Item 1\n * Item 2";
587 let result4 = CodeBlockUtils::identify_code_block_lines(content4);
588 assert_eq!(result4, vec![false, false, false]);
589 }
590
591 #[test]
592 fn test_code_block_state_enum() {
593 assert_eq!(CodeBlockState::None, CodeBlockState::None);
594 assert_eq!(CodeBlockState::Fenced, CodeBlockState::Fenced);
595 assert_eq!(CodeBlockState::Indented, CodeBlockState::Indented);
596 assert_ne!(CodeBlockState::None, CodeBlockState::Fenced);
597 }
598
599 #[test]
600 fn test_code_block_info() {
601 let content = "Normal\n```\ncode\n```\nText";
602 let info = CodeBlockInfo::new(content);
603
604 assert!(!info.is_in_code_block(0));
605 assert!(info.is_in_code_block(1));
606 assert!(info.is_in_code_block(2));
607 assert!(info.is_in_code_block(3));
608 assert!(!info.is_in_code_block(4));
609
610 assert!(info.has_code_blocks());
611
612 assert!(!info.is_in_code_block(100));
614 }
615
616 #[test]
617 fn test_code_block_info_code_spans() {
618 let content = "Text with `inline code` here";
619 let info = CodeBlockInfo::new(content);
620
621 assert!(info.has_code_spans());
622 assert!(!info.has_code_blocks());
623
624 assert!(info.is_in_code_span(0, 11)); assert!(info.is_in_code_span(0, 15)); assert!(!info.is_in_code_span(0, 5)); assert!(!info.is_in_code_span(0, 25)); }
630
631 #[test]
632 fn test_compute_code_blocks() {
633 let content = "Normal\n```\ncode\n```\n indented";
634 let states = compute_code_blocks(content);
635
636 assert_eq!(states[0], CodeBlockState::None);
637 assert_eq!(states[1], CodeBlockState::Fenced);
638 assert_eq!(states[2], CodeBlockState::Fenced);
639 assert_eq!(states[3], CodeBlockState::Fenced);
640 assert_eq!(states[4], CodeBlockState::Indented);
641 }
642
643 #[test]
644 fn test_compute_code_spans() {
645 let content = "Text `code` and ``double`` backticks";
646 let spans = compute_code_spans(content);
647
648 assert_eq!(spans.len(), 2);
649 assert_eq!(&content[spans[0].0..spans[0].1], "`code`");
651 assert_eq!(&content[spans[1].0..spans[1].1], "``double``");
653
654 let content2 = r"Text \`not code\` but `real code`";
656 let spans2 = compute_code_spans(content2);
657 assert_eq!(spans2.len(), 1);
658 assert!(content2[spans2[0].0..spans2[0].1].contains("real code"));
659 }
660
661 #[test]
662 fn test_code_block_style() {
663 assert_eq!(CodeBlockStyle::Fenced.to_string(), "fenced");
664 assert_eq!(CodeBlockStyle::Indented.to_string(), "indented");
665 assert_eq!(CodeBlockStyle::Consistent.to_string(), "consistent");
666
667 assert_eq!(CodeBlockStyle::default(), CodeBlockStyle::Consistent);
668 }
669
670 #[test]
671 fn test_nested_code_blocks() {
672 let content = "```\n```\ncode\n```\n```";
674 let result = CodeBlockUtils::identify_code_block_lines(content);
675 assert_eq!(result, vec![true, true, false, true, true]);
677 }
678
679 #[test]
680 fn test_unicode_content() {
681 let content = "```rust\nlet 你好 = \"世界\";\n```";
682 let result = CodeBlockUtils::identify_code_block_lines(content);
683 assert_eq!(result, vec![true, true, true]);
684
685 assert_eq!(CodeBlockUtils::get_language_specifier("```🦀"), Some("🦀".to_string()));
686 }
687
688 #[test]
689 fn test_edge_cases() {
690 assert_eq!(CodeBlockUtils::identify_code_block_lines(""), Vec::<bool>::new());
692 assert!(!CodeBlockUtils::is_in_code_block("", 0));
693
694 assert_eq!(CodeBlockUtils::identify_code_block_lines("```"), vec![true]);
696 assert_eq!(CodeBlockUtils::identify_code_block_lines("~~~"), vec![true]);
697
698 let content = "```\ncode\n~~~\nmore\n```";
700 let result = CodeBlockUtils::identify_code_block_lines(content);
701 assert_eq!(result, vec![true, true, true, true, true]);
702 }
703}