1use crate::utils::element_cache::ElementCache;
2use crate::utils::range_utils::LineIndex;
3use regex::Regex;
4use std::fmt;
5use std::sync::LazyLock;
6
7static FENCED_CODE_BLOCK_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap());
9static FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```\s*$").unwrap());
10static ALTERNATE_FENCED_CODE_BLOCK_START: LazyLock<Regex> =
11 LazyLock::new(|| Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap());
12static ALTERNATE_FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)~~~\s*$").unwrap());
13static LIST_ITEM_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)([*+-]|\d+[.)])(\s*)(.*)$").unwrap());
14
15pub struct CodeBlockUtils;
17
18impl CodeBlockUtils {
19 pub fn is_in_code_block(content: &str, line_num: usize) -> bool {
21 let lines: Vec<&str> = content.lines().collect();
22 if line_num >= lines.len() {
23 return false;
24 }
25
26 let mut in_fenced_code = false;
27 let mut in_alternate_fenced = false;
28
29 for (i, line) in lines.iter().enumerate() {
30 if i > line_num {
31 break;
32 }
33
34 if FENCED_CODE_BLOCK_START.is_match(line) {
35 in_fenced_code = !in_fenced_code;
36 } else if FENCED_CODE_BLOCK_END.is_match(line) && in_fenced_code {
37 in_fenced_code = false;
38 } else if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
39 in_alternate_fenced = !in_alternate_fenced;
40 } else if ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) && in_alternate_fenced {
41 in_alternate_fenced = false;
42 }
43 }
44
45 if line_num < lines.len() && Self::is_indented_code_block(lines[line_num]) {
47 return true;
48 }
49
50 in_fenced_code || in_alternate_fenced
52 }
53
54 pub fn is_code_block_delimiter(line: &str) -> bool {
56 FENCED_CODE_BLOCK_START.is_match(line)
57 || FENCED_CODE_BLOCK_END.is_match(line)
58 || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
59 || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
60 }
61
62 pub fn is_code_block_start(line: &str) -> bool {
64 FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
65 }
66
67 pub fn is_code_block_end(line: &str) -> bool {
69 FENCED_CODE_BLOCK_END.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
70 }
71
72 pub fn is_indented_code_block(line: &str) -> bool {
74 ElementCache::calculate_indentation_width_default(line) >= 4
76 }
77
78 pub fn get_language_specifier(line: &str) -> Option<String> {
104 if FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
105 let trimmed = line.trim_start();
106 let after_fence = &trimmed[3..].trim_start();
107 if !after_fence.is_empty() {
108 return Some(after_fence.to_string());
109 }
110 }
111 None
112 }
113
114 pub fn identify_code_block_lines(content: &str) -> Vec<bool> {
143 let lines: Vec<&str> = content.lines().collect();
144 let mut in_code_block = vec![false; lines.len()];
145
146 let mut in_fenced_code = false;
147 let mut in_alternate_fenced = false;
148
149 for (i, line) in lines.iter().enumerate() {
150 let trimmed = line.trim_start();
152
153 if trimmed.starts_with("```") {
154 if FENCED_CODE_BLOCK_START.is_match(line) {
155 in_fenced_code = !in_fenced_code;
156 in_code_block[i] = true; } else if in_fenced_code && FENCED_CODE_BLOCK_END.is_match(line) {
158 in_fenced_code = false;
159 in_code_block[i] = true; }
161 } else if trimmed.starts_with("~~~") {
162 if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
163 in_alternate_fenced = !in_alternate_fenced;
164 in_code_block[i] = true; } else if in_alternate_fenced && ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) {
166 in_alternate_fenced = false;
167 in_code_block[i] = true; }
169 }
170
171 if in_fenced_code || in_alternate_fenced {
173 in_code_block[i] = true;
174 } else if !in_code_block[i] {
175 if ElementCache::calculate_indentation_width_default(line) >= 4 && !LIST_ITEM_RE.is_match(line) {
178 in_code_block[i] = true;
179 }
180 }
181 }
182
183 in_code_block
184 }
185}
186
187static FENCED_CODE_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(?:```|~~~)").unwrap());
189
190#[derive(Debug, PartialEq, Clone, Copy)]
192pub enum CodeBlockState {
193 None,
194 Fenced,
195 Indented,
196}
197
198#[derive(Debug)]
200pub struct CodeBlockInfo<'a> {
201 pub block_states: Vec<CodeBlockState>,
203 pub code_spans: Vec<(usize, usize)>,
205 content: &'a str,
207 line_index: LineIndex<'a>,
209}
210
211impl<'a> CodeBlockInfo<'a> {
212 pub fn new(content: &'a str) -> Self {
214 let block_states = compute_code_blocks(content);
215 let code_spans = compute_code_spans(content);
216 let line_index = LineIndex::new(content);
217
218 CodeBlockInfo {
219 block_states,
220 code_spans,
221 content,
222 line_index,
223 }
224 }
225
226 pub fn is_in_code_block(&self, line_index: usize) -> bool {
228 if line_index < self.block_states.len() {
229 self.block_states[line_index] != CodeBlockState::None
230 } else {
231 false
232 }
233 }
234
235 pub fn is_in_code_span(&self, line_index: usize, column_index: usize) -> bool {
237 let line_start = self
239 .line_index
240 .get_line_start_byte(line_index + 1)
241 .unwrap_or(self.content.len());
242 let position = line_start + column_index;
243
244 for &(start, end) in &self.code_spans {
246 if position >= start && position <= end {
247 return true;
248 }
249 }
250
251 false
252 }
253
254 pub fn has_code_blocks(&self) -> bool {
256 self.block_states.iter().any(|state| *state != CodeBlockState::None)
257 }
258
259 pub fn has_code_spans(&self) -> bool {
261 !self.code_spans.is_empty()
262 }
263}
264
265pub fn compute_code_blocks(content: &str) -> Vec<CodeBlockState> {
267 let mut in_fenced_block = false;
268 let mut result = Vec::new();
269 let mut fence_marker = "";
270
271 for line in content.lines() {
272 if in_fenced_block {
273 if line.trim().starts_with(fence_marker) {
274 in_fenced_block = false;
275 result.push(CodeBlockState::Fenced); } else {
277 result.push(CodeBlockState::Fenced);
278 }
279 } else if FENCED_CODE_BLOCK_PATTERN.is_match(line) {
280 in_fenced_block = true;
281 fence_marker = if line.trim().starts_with("```") { "```" } else { "~~~" };
282 result.push(CodeBlockState::Fenced); } else if !line.trim().is_empty() {
284 if ElementCache::calculate_indentation_width_default(line) >= 4 {
286 result.push(CodeBlockState::Indented);
287 } else {
288 result.push(CodeBlockState::None);
289 }
290 } else {
291 result.push(CodeBlockState::None);
292 }
293 }
294
295 result
296}
297
298pub fn compute_code_spans(content: &str) -> Vec<(usize, usize)> {
300 let mut spans = Vec::new();
301
302 let chars: Vec<char> = content.chars().collect();
304 let mut i = 0;
305
306 while i < chars.len() {
307 if i > 0 && chars[i] == '`' && chars[i - 1] == '\\' {
309 i += 1;
310 continue;
311 }
312
313 if chars[i] == '`' {
315 let mut backtick_count = 1;
316 let start_idx = i;
317
318 i += 1;
320 while i < chars.len() && chars[i] == '`' {
321 backtick_count += 1;
322 i += 1;
323 }
324
325 if is_likely_code_block_delimiter(&chars, start_idx) {
328 continue;
329 }
330
331 let mut j = i;
333 let mut found_closing = false;
334
335 while j < chars.len() {
336 if j > 0 && chars[j] == '`' && chars[j - 1] == '\\' {
338 j += 1;
339 continue;
340 }
341
342 if chars[j] == '`' {
343 let mut closing_count = 1;
344 let potential_end = j;
345
346 j += 1;
348 while j < chars.len() && chars[j] == '`' {
349 closing_count += 1;
350 j += 1;
351 }
352
353 if closing_count == backtick_count {
355 let start_byte = chars[..start_idx].iter().map(|c| c.len_utf8()).sum();
357 let end_byte = chars[..potential_end + closing_count]
358 .iter()
359 .map(|c| c.len_utf8())
360 .sum();
361
362 spans.push((start_byte, end_byte));
363 i = j; found_closing = true;
365 break;
366 }
367 }
368
369 j += 1;
370 }
371
372 if !found_closing {
373 continue;
375 }
376 } else {
377 i += 1;
378 }
379 }
380
381 spans
382}
383
384fn is_likely_code_block_delimiter(chars: &[char], start_idx: usize) -> bool {
386 let mut count = 0;
387 let mut i = start_idx;
388
389 while i < chars.len() && chars[i] == '`' {
391 count += 1;
392 i += 1;
393 }
394
395 if count < 3 {
396 return false;
398 }
399
400 let mut j = start_idx;
402 if j > 0 {
403 j -= 1;
404 while j > 0 && chars[j] != '\n' {
406 if !chars[j].is_whitespace() {
407 return false;
409 }
410 j -= 1;
411 }
412 }
413
414 true
415}
416
417#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
419pub enum CodeBlockStyle {
420 #[default]
422 Consistent,
423 Indented,
425 Fenced,
427}
428
429impl fmt::Display for CodeBlockStyle {
430 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
431 match self {
432 CodeBlockStyle::Fenced => write!(f, "fenced"),
433 CodeBlockStyle::Indented => write!(f, "indented"),
434 CodeBlockStyle::Consistent => write!(f, "consistent"),
435 }
436 }
437}
438
439#[cfg(test)]
440mod tests {
441 use super::*;
442
443 #[test]
444 fn test_is_in_code_block() {
445 let content = "Normal text
446```rust
447let x = 1;
448```
449More text";
450
451 assert!(!CodeBlockUtils::is_in_code_block(content, 0));
452 assert!(CodeBlockUtils::is_in_code_block(content, 1));
453 assert!(CodeBlockUtils::is_in_code_block(content, 2));
454 assert!(!CodeBlockUtils::is_in_code_block(content, 3)); assert!(!CodeBlockUtils::is_in_code_block(content, 4));
456
457 let content2 = "Text\n~~~\ncode\n~~~\nEnd";
459 assert!(!CodeBlockUtils::is_in_code_block(content2, 0));
460 assert!(CodeBlockUtils::is_in_code_block(content2, 1));
461 assert!(CodeBlockUtils::is_in_code_block(content2, 2));
462 assert!(!CodeBlockUtils::is_in_code_block(content2, 3)); assert!(!CodeBlockUtils::is_in_code_block(content2, 4));
464
465 let content3 = "Normal\n indented code\nNormal";
467 assert!(!CodeBlockUtils::is_in_code_block(content3, 0));
468 assert!(CodeBlockUtils::is_in_code_block(content3, 1));
469 assert!(!CodeBlockUtils::is_in_code_block(content3, 2));
470
471 assert!(!CodeBlockUtils::is_in_code_block("test", 10));
473 }
474
475 #[test]
476 fn test_is_code_block_delimiter() {
477 assert!(CodeBlockUtils::is_code_block_delimiter("```"));
478 assert!(CodeBlockUtils::is_code_block_delimiter("```rust"));
479 assert!(CodeBlockUtils::is_code_block_delimiter(" ```"));
480 assert!(CodeBlockUtils::is_code_block_delimiter("~~~"));
481 assert!(CodeBlockUtils::is_code_block_delimiter("~~~python"));
482
483 assert!(!CodeBlockUtils::is_code_block_delimiter("Normal text"));
484 assert!(!CodeBlockUtils::is_code_block_delimiter("``"));
485 assert!(!CodeBlockUtils::is_code_block_delimiter("~"));
486 assert!(!CodeBlockUtils::is_code_block_delimiter(""));
487 }
488
489 #[test]
490 fn test_is_code_block_start() {
491 assert!(CodeBlockUtils::is_code_block_start("```"));
492 assert!(CodeBlockUtils::is_code_block_start("```rust"));
493 assert!(CodeBlockUtils::is_code_block_start("~~~"));
494 assert!(CodeBlockUtils::is_code_block_start("~~~python"));
495 assert!(CodeBlockUtils::is_code_block_start(" ```"));
496
497 assert!(!CodeBlockUtils::is_code_block_start("Normal text"));
498 assert!(!CodeBlockUtils::is_code_block_start(""));
499 }
500
501 #[test]
502 fn test_is_code_block_end() {
503 assert!(CodeBlockUtils::is_code_block_end("```"));
504 assert!(CodeBlockUtils::is_code_block_end("~~~"));
505 assert!(CodeBlockUtils::is_code_block_end(" ```"));
506 assert!(CodeBlockUtils::is_code_block_end("``` "));
507
508 assert!(!CodeBlockUtils::is_code_block_end("```rust"));
510 assert!(!CodeBlockUtils::is_code_block_end("~~~python"));
511 assert!(!CodeBlockUtils::is_code_block_end("Normal text"));
512 }
513
514 #[test]
515 fn test_is_indented_code_block() {
516 assert!(CodeBlockUtils::is_indented_code_block(" code"));
517 assert!(CodeBlockUtils::is_indented_code_block(" more indented"));
518
519 assert!(CodeBlockUtils::is_indented_code_block("\tcode")); assert!(CodeBlockUtils::is_indented_code_block("\t\tcode")); assert!(CodeBlockUtils::is_indented_code_block(" \tcode")); assert!(CodeBlockUtils::is_indented_code_block(" \tcode")); assert!(CodeBlockUtils::is_indented_code_block(" \tcode")); assert!(!CodeBlockUtils::is_indented_code_block(" code")); assert!(!CodeBlockUtils::is_indented_code_block("normal text"));
528 assert!(!CodeBlockUtils::is_indented_code_block(""));
529 }
530
531 #[test]
532 fn test_get_language_specifier() {
533 assert_eq!(
534 CodeBlockUtils::get_language_specifier("```rust"),
535 Some("rust".to_string())
536 );
537 assert_eq!(
538 CodeBlockUtils::get_language_specifier("~~~python"),
539 Some("python".to_string())
540 );
541 assert_eq!(
542 CodeBlockUtils::get_language_specifier("```javascript"),
543 Some("javascript".to_string())
544 );
545 assert_eq!(
546 CodeBlockUtils::get_language_specifier(" ```rust"),
547 Some("rust".to_string())
548 );
549 assert_eq!(
550 CodeBlockUtils::get_language_specifier("```rust ignore"),
551 Some("rust ignore".to_string())
552 );
553
554 assert_eq!(CodeBlockUtils::get_language_specifier("```"), None);
555 assert_eq!(CodeBlockUtils::get_language_specifier("~~~"), None);
556 assert_eq!(CodeBlockUtils::get_language_specifier("Normal text"), None);
557 assert_eq!(CodeBlockUtils::get_language_specifier(""), None);
558 }
559
560 #[test]
561 fn test_identify_code_block_lines() {
562 let content = "Normal text
563```rust
564let x = 1;
565```
566More text";
567
568 let result = CodeBlockUtils::identify_code_block_lines(content);
569 assert_eq!(result, vec![false, true, true, true, false]);
570
571 let content2 = "Text\n~~~\ncode\n~~~\nEnd";
573 let result2 = CodeBlockUtils::identify_code_block_lines(content2);
574 assert_eq!(result2, vec![false, true, true, true, false]);
575
576 let content3 = "Normal\n code\n more code\nNormal";
578 let result3 = CodeBlockUtils::identify_code_block_lines(content3);
579 assert_eq!(result3, vec![false, true, true, false]);
580
581 let content4 = "List:\n * Item 1\n * Item 2";
583 let result4 = CodeBlockUtils::identify_code_block_lines(content4);
584 assert_eq!(result4, vec![false, false, false]);
585 }
586
587 #[test]
588 fn test_code_block_state_enum() {
589 assert_eq!(CodeBlockState::None, CodeBlockState::None);
590 assert_eq!(CodeBlockState::Fenced, CodeBlockState::Fenced);
591 assert_eq!(CodeBlockState::Indented, CodeBlockState::Indented);
592 assert_ne!(CodeBlockState::None, CodeBlockState::Fenced);
593 }
594
595 #[test]
596 fn test_code_block_info() {
597 let content = "Normal\n```\ncode\n```\nText";
598 let info = CodeBlockInfo::new(content);
599
600 assert!(!info.is_in_code_block(0));
601 assert!(info.is_in_code_block(1));
602 assert!(info.is_in_code_block(2));
603 assert!(info.is_in_code_block(3));
604 assert!(!info.is_in_code_block(4));
605
606 assert!(info.has_code_blocks());
607
608 assert!(!info.is_in_code_block(100));
610 }
611
612 #[test]
613 fn test_code_block_info_code_spans() {
614 let content = "Text with `inline code` here";
615 let info = CodeBlockInfo::new(content);
616
617 assert!(info.has_code_spans());
618 assert!(!info.has_code_blocks());
619
620 assert!(info.is_in_code_span(0, 11)); assert!(info.is_in_code_span(0, 15)); assert!(!info.is_in_code_span(0, 5)); assert!(!info.is_in_code_span(0, 25)); }
626
627 #[test]
628 fn test_compute_code_blocks() {
629 let content = "Normal\n```\ncode\n```\n indented";
630 let states = compute_code_blocks(content);
631
632 assert_eq!(states[0], CodeBlockState::None);
633 assert_eq!(states[1], CodeBlockState::Fenced);
634 assert_eq!(states[2], CodeBlockState::Fenced);
635 assert_eq!(states[3], CodeBlockState::Fenced);
636 assert_eq!(states[4], CodeBlockState::Indented);
637 }
638
639 #[test]
640 fn test_compute_code_spans() {
641 let content = "Text `code` and ``double`` backticks";
642 let spans = compute_code_spans(content);
643
644 assert_eq!(spans.len(), 2);
645 assert_eq!(&content[spans[0].0..spans[0].1], "`code`");
647 assert_eq!(&content[spans[1].0..spans[1].1], "``double``");
649
650 let content2 = r"Text \`not code\` but `real code`";
652 let spans2 = compute_code_spans(content2);
653 assert_eq!(spans2.len(), 1);
654 assert!(content2[spans2[0].0..spans2[0].1].contains("real code"));
655 }
656
657 #[test]
658 fn test_code_block_style() {
659 assert_eq!(CodeBlockStyle::Fenced.to_string(), "fenced");
660 assert_eq!(CodeBlockStyle::Indented.to_string(), "indented");
661 assert_eq!(CodeBlockStyle::Consistent.to_string(), "consistent");
662
663 assert_eq!(CodeBlockStyle::default(), CodeBlockStyle::Consistent);
664 }
665
666 #[test]
667 fn test_nested_code_blocks() {
668 let content = "```\n```\ncode\n```\n```";
670 let result = CodeBlockUtils::identify_code_block_lines(content);
671 assert_eq!(result, vec![true, true, false, true, true]);
673 }
674
675 #[test]
676 fn test_unicode_content() {
677 let content = "```rust\nlet 你好 = \"世界\";\n```";
678 let result = CodeBlockUtils::identify_code_block_lines(content);
679 assert_eq!(result, vec![true, true, true]);
680
681 assert_eq!(CodeBlockUtils::get_language_specifier("```🦀"), Some("🦀".to_string()));
682 }
683
684 #[test]
685 fn test_edge_cases() {
686 assert_eq!(CodeBlockUtils::identify_code_block_lines(""), Vec::<bool>::new());
688 assert!(!CodeBlockUtils::is_in_code_block("", 0));
689
690 assert_eq!(CodeBlockUtils::identify_code_block_lines("```"), vec![true]);
692 assert_eq!(CodeBlockUtils::identify_code_block_lines("~~~"), vec![true]);
693
694 let content = "```\ncode\n~~~\nmore\n```";
696 let result = CodeBlockUtils::identify_code_block_lines(content);
697 assert_eq!(result, vec![true, true, true, true, true]);
698 }
699}