1use regex::Regex;
20use std::sync::LazyLock;
21
22use crate::utils::skip_context::ByteRange;
23
24static DIV_OPEN_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*(?:\{[^}]+\}|\S+)").unwrap());
28
29static DIV_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*$").unwrap());
32
33static CALLOUT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
36 Regex::new(r"^(\s*):::\s*\{[^}]*\.callout-(?:note|warning|tip|important|caution)[^}]*\}").unwrap()
37});
38
39static PANDOC_ATTR_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{[^}]+\}").unwrap());
43
44pub fn is_div_open(line: &str) -> bool {
46 DIV_OPEN_PATTERN.is_match(line)
47}
48
49pub fn is_div_close(line: &str) -> bool {
51 DIV_CLOSE_PATTERN.is_match(line)
52}
53
54pub fn is_callout_open(line: &str) -> bool {
56 CALLOUT_PATTERN.is_match(line)
57}
58
59pub fn has_pandoc_attributes(line: &str) -> bool {
61 PANDOC_ATTR_PATTERN.is_match(line)
62}
63
64pub fn is_pandoc_raw_block_lang(lang: &str) -> bool {
68 let l = lang.trim();
69 l.starts_with("{=") && l.ends_with('}') && {
70 let inner = &l[2..l.len() - 1];
71 !inner.trim().is_empty() && inner.chars().all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
72 }
73}
74
75pub fn is_pandoc_code_class_attr(lang: &str) -> bool {
83 let l = lang.trim();
84 if !l.starts_with('{') || !l.ends_with('}') || l.len() < 2 {
85 return false;
86 }
87 let inner = &l[1..l.len() - 1];
88 inner.split_whitespace().any(|tok| {
89 tok.len() > 1
90 && tok.starts_with('.')
91 && tok[1..]
92 .chars()
93 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
94 })
95}
96
97pub fn get_div_indent(line: &str) -> usize {
99 let mut indent = 0;
100 for c in line.chars() {
101 match c {
102 ' ' => indent += 1,
103 '\t' => indent += 4, _ => break,
105 }
106 }
107 indent
108}
109
110#[derive(Debug, Clone, Default)]
112pub struct DivTracker {
113 indent_stack: Vec<usize>,
115}
116
117impl DivTracker {
118 pub fn new() -> Self {
119 Self::default()
120 }
121
122 pub fn process_line(&mut self, line: &str) -> bool {
124 let trimmed = line.trim_start();
125
126 if trimmed.starts_with(":::") {
127 let indent = get_div_indent(line);
128
129 if is_div_close(line) {
130 if let Some(&top_indent) = self.indent_stack.last()
133 && top_indent >= indent
134 {
135 self.indent_stack.pop();
136 }
137 } else if is_div_open(line) {
138 self.indent_stack.push(indent);
140 }
141 }
142
143 !self.indent_stack.is_empty()
144 }
145
146 pub fn is_inside_div(&self) -> bool {
148 !self.indent_stack.is_empty()
149 }
150}
151
152pub fn detect_div_block_ranges(content: &str) -> Vec<ByteRange> {
155 let mut ranges = Vec::new();
156 let mut tracker = DivTracker::new();
157 let mut div_start: Option<usize> = None;
158 let mut byte_offset = 0;
159
160 for line in content.lines() {
161 let line_len = line.len();
162 let was_inside = tracker.is_inside_div();
163 let is_inside = tracker.process_line(line);
164
165 if !was_inside && is_inside {
167 div_start = Some(byte_offset);
168 }
169 else if was_inside
171 && !is_inside
172 && let Some(start) = div_start.take()
173 {
174 ranges.push(ByteRange {
176 start,
177 end: byte_offset + line_len,
178 });
179 }
180
181 byte_offset += line_len + 1;
183 }
184
185 if let Some(start) = div_start {
187 ranges.push(ByteRange {
188 start,
189 end: content.len(),
190 });
191 }
192
193 ranges
194}
195
196pub fn is_within_div_block_ranges(ranges: &[ByteRange], position: usize) -> bool {
198 ranges.iter().any(|r| position >= r.start && position < r.end)
199}
200
201static BRACKETED_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
223 Regex::new(r"\[(?:[^\]@]*[^A-Za-z0-9_])?@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*[^\]]*\]").unwrap()
224});
225
226static INLINE_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
230 Regex::new(r"(?:^|[\s\(\[\{,;:])(@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*)").unwrap()
232});
233
234static LINK_LABEL_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\[[^\]]*\])(?:\(|\[)").unwrap());
241
242#[inline]
244pub fn has_citations(text: &str) -> bool {
245 text.contains('@')
246}
247
248static INLINE_FOOTNOTE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?:^|[^\w!])(\^\[[^\]]*\])").unwrap());
262
263pub fn pandoc_header_slug(text: &str) -> String {
273 let mut s = String::with_capacity(text.len());
274 for c in text.chars() {
275 if c.is_alphanumeric() || c == '_' || c == '-' || c == '.' {
276 for lc in c.to_lowercase() {
277 s.push(lc);
278 }
279 } else if c.is_whitespace() {
280 if !s.ends_with('-') {
282 s.push('-');
283 }
284 }
285 }
287 let trimmed = s.trim_matches('-').to_string();
288 if trimmed.is_empty() {
289 "section".to_string()
290 } else {
291 trimmed
292 }
293}
294
295pub fn collect_pandoc_header_slugs(content: &str) -> std::collections::HashSet<String> {
310 use std::collections::{HashMap, HashSet};
311 let mut slugs = HashSet::new();
312 let mut base_counts: HashMap<String, usize> = HashMap::new();
313 let mut in_fence = false;
314 let mut fence_marker: Option<char> = None;
315 for line in content.lines() {
316 let trimmed = line.trim_start();
317 if let Some(c) = trimmed.chars().next()
321 && (c == '`' || c == '~')
322 {
323 let count = trimmed.chars().take_while(|&ch| ch == c).count();
324 if count >= 3 {
325 match fence_marker {
326 None => {
327 in_fence = true;
328 fence_marker = Some(c);
329 }
330 Some(m) if m == c => {
331 in_fence = false;
332 fence_marker = None;
333 }
334 _ => {}
335 }
336 continue;
337 }
338 }
339 if in_fence {
340 continue;
341 }
342 if let Some(rest) = trimmed.strip_prefix('#') {
343 let mut text = rest.trim_start_matches('#').trim();
344 if let Some(idx) = text.rfind(" {")
349 && let Some(close_rel) = text[idx + 2..].find('}')
350 && text[idx + 2 + close_rel + 1..].trim().is_empty()
351 {
352 text = &text[..idx];
353 }
354 let base = pandoc_header_slug(text);
355 let count = base_counts.entry(base.clone()).or_insert(0);
356 let slug = if *count == 0 {
357 base.clone()
358 } else {
359 format!("{base}-{count}")
360 };
361 *count += 1;
362 slugs.insert(slug);
363 }
364 }
365 slugs
366}
367
368static SUBSCRIPT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"~[^\s~]+~").unwrap());
383
384static SUPERSCRIPT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^[^\s^]+\^").unwrap());
386
387pub fn detect_subscript_superscript_ranges(content: &str) -> Vec<ByteRange> {
396 let bytes = content.as_bytes();
397 let mut ranges = Vec::new();
398
399 for m in SUBSCRIPT_PATTERN.find_iter(content) {
400 let prev = m.start().checked_sub(1).map_or(0, |i| bytes[i]);
402 let next = bytes.get(m.end()).copied().unwrap_or(0);
403 if prev != b'~' && next != b'~' {
404 ranges.push(ByteRange {
405 start: m.start(),
406 end: m.end(),
407 });
408 }
409 }
410 for m in SUPERSCRIPT_PATTERN.find_iter(content) {
411 let prev = m.start().checked_sub(1).map_or(0, |i| bytes[i]);
413 let next = bytes.get(m.end()).copied().unwrap_or(0);
414 if prev != b'^' && next != b'^' {
415 ranges.push(ByteRange {
416 start: m.start(),
417 end: m.end(),
418 });
419 }
420 }
421 ranges.sort_by_key(|r| r.start);
423 ranges
424}
425
426static INLINE_CODE_ATTR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`[^`]*`(\{[^}]+\})").unwrap());
439
440pub fn detect_inline_code_attr_ranges(content: &str) -> Vec<ByteRange> {
446 let mut ranges = Vec::new();
447 for caps in INLINE_CODE_ATTR.captures_iter(content) {
448 let m = caps.get(1).unwrap();
449 ranges.push(ByteRange {
450 start: m.start(),
451 end: m.end(),
452 });
453 }
454 ranges
455}
456
457static EXAMPLE_LIST_MARKER: LazyLock<Regex> =
472 LazyLock::new(|| Regex::new(r"(?m)^[ \t]*(\(@[A-Za-z0-9_-]*\))[ \t]+").unwrap());
473
474static EXAMPLE_REFERENCE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\(@[A-Za-z0-9_-]+\))").unwrap());
477
478pub fn detect_example_list_marker_ranges(content: &str) -> Vec<ByteRange> {
483 let mut ranges = Vec::new();
484 for caps in EXAMPLE_LIST_MARKER.captures_iter(content) {
485 let m = caps.get(1).unwrap();
486 ranges.push(ByteRange {
487 start: m.start(),
488 end: m.end(),
489 });
490 }
491 ranges
492}
493
494pub fn detect_example_reference_ranges(content: &str, marker_ranges: &[ByteRange]) -> Vec<ByteRange> {
501 let mut ranges = Vec::new();
502 let marker_starts: std::collections::HashSet<usize> = marker_ranges.iter().map(|r| r.start).collect();
503 for caps in EXAMPLE_REFERENCE.captures_iter(content) {
504 let m = caps.get(1).unwrap();
505 if !marker_starts.contains(&m.start()) {
506 ranges.push(ByteRange {
507 start: m.start(),
508 end: m.end(),
509 });
510 }
511 }
512 ranges
513}
514
515static BRACKETED_SPAN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[[^\]]+\]\{[^}]+\}").unwrap());
531
532pub fn detect_bracketed_span_ranges(content: &str) -> Vec<ByteRange> {
537 let mut ranges = Vec::new();
538 for m in BRACKETED_SPAN.find_iter(content) {
539 ranges.push(ByteRange {
540 start: m.start(),
541 end: m.end(),
542 });
543 }
544 ranges
545}
546
547pub fn detect_line_block_ranges(content: &str) -> Vec<ByteRange> {
569 let mut ranges = Vec::new();
570 let mut in_block = false;
571 let mut block_start = 0usize;
572 let mut block_end = 0usize;
573 let mut byte_offset = 0usize;
574
575 for line in content.split_inclusive('\n') {
576 let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
577 let is_line_block_line = trimmed.starts_with("| ") && !trimmed.trim_end().ends_with('|');
578 let is_continuation = in_block
579 && !trimmed.is_empty()
580 && trimmed.starts_with(|c: char| c.is_whitespace())
581 && !trimmed.trim_start().starts_with('|');
582
583 if is_line_block_line || is_continuation {
584 if !in_block {
585 block_start = byte_offset;
586 in_block = true;
587 }
588 block_end = byte_offset + line.len();
589 } else if in_block {
590 ranges.push(ByteRange {
591 start: block_start,
592 end: block_end,
593 });
594 in_block = false;
595 }
596 byte_offset += line.len();
597 }
598 if in_block {
599 ranges.push(ByteRange {
600 start: block_start,
601 end: block_end,
602 });
603 }
604 ranges
605}
606
607pub fn detect_pipe_table_caption_ranges(content: &str) -> Vec<ByteRange> {
628 let mut lines: Vec<&str> = Vec::new();
629 let mut line_offsets: Vec<usize> = Vec::new();
630 let mut offset = 0usize;
631 for line in content.split_inclusive('\n') {
632 line_offsets.push(offset);
633 lines.push(line);
634 offset += line.len();
635 }
636 line_offsets.push(offset);
637
638 fn line_body(line: &str) -> &str {
639 line.trim_end_matches('\n').trim_end_matches('\r')
640 }
641 fn is_pipe_table_row(line: &str) -> bool {
642 let t = line_body(line).trim();
643 t.starts_with('|') && t.ends_with('|') && t.len() >= 3
644 }
645 fn is_caption_line(line: &str) -> bool {
646 line_body(line).trim_start().starts_with(": ")
647 }
648 fn is_blank(line: &str) -> bool {
649 line_body(line).trim().is_empty()
650 }
651
652 let mut ranges = Vec::new();
653 for (i, line) in lines.iter().enumerate() {
654 if !is_caption_line(line) {
655 continue;
656 }
657 let table_below = i + 2 < lines.len() && is_blank(lines[i + 1]) && is_pipe_table_row(lines[i + 2]);
658 let table_above = i >= 2 && is_blank(lines[i - 1]) && is_pipe_table_row(lines[i - 2]);
659 if table_below || table_above {
660 ranges.push(ByteRange {
661 start: line_offsets[i],
662 end: line_offsets[i + 1],
663 });
664 }
665 }
666 ranges
667}
668
669pub fn detect_yaml_metadata_block_ranges(content: &str) -> Vec<ByteRange> {
686 let mut lines: Vec<&str> = Vec::new();
687 let mut line_offsets: Vec<usize> = Vec::new();
688 let mut offset = 0usize;
689 for line in content.split_inclusive('\n') {
690 line_offsets.push(offset);
691 lines.push(line);
692 offset += line.len();
693 }
694 line_offsets.push(offset);
695
696 fn line_body(line: &str) -> &str {
697 line.trim_end_matches('\n').trim_end_matches('\r')
698 }
699 fn is_blank(line: &str) -> bool {
700 line_body(line).trim().is_empty()
701 }
702 fn is_opener(line: &str) -> bool {
703 line_body(line).trim_end() == "---"
704 }
705 fn is_closer(line: &str) -> bool {
706 let t = line_body(line).trim_end();
707 t == "---" || t == "..."
708 }
709
710 let mut ranges = Vec::new();
711 let mut i = 0;
712 while i < lines.len() {
713 let preceded_by_blank = i == 0 || is_blank(lines[i - 1]);
714 if preceded_by_blank && is_opener(lines[i]) {
715 let mut j = i + 1;
716 let mut found_closer = false;
717 while j < lines.len() {
718 if is_closer(lines[j]) {
719 ranges.push(ByteRange {
720 start: line_offsets[i],
721 end: line_offsets[j + 1],
722 });
723 i = j + 1;
724 found_closer = true;
725 break;
726 }
727 j += 1;
728 }
729 if !found_closer {
730 i += 1;
732 }
733 } else {
734 i += 1;
735 }
736 }
737 ranges
738}
739
740static GRID_BORDER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\+(?:[-=]+\+)+\s*$").unwrap());
752
753static GRID_CONTENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\|.*\|\s*$").unwrap());
755
756pub fn detect_grid_table_ranges(content: &str) -> Vec<ByteRange> {
764 let mut lines: Vec<&str> = Vec::new();
765 let mut line_offsets: Vec<usize> = Vec::new();
766 let mut offset = 0usize;
767 for line in content.split_inclusive('\n') {
768 line_offsets.push(offset);
769 lines.push(line);
770 offset += line.len();
771 }
772 line_offsets.push(offset);
773
774 fn line_body(line: &str) -> &str {
775 line.trim_end_matches('\n').trim_end_matches('\r')
776 }
777 fn is_border(line: &str) -> bool {
778 GRID_BORDER.is_match(line_body(line))
779 }
780 fn is_content(line: &str) -> bool {
781 GRID_CONTENT.is_match(line_body(line))
782 }
783
784 let mut ranges = Vec::new();
785 let mut i = 0;
786 while i < lines.len() {
787 if is_border(lines[i]) {
788 let start_line = i;
789 let mut j = i + 1;
790 let mut last_border = i;
791 let mut saw_content = false;
792 while j < lines.len() {
793 if is_border(lines[j]) {
794 last_border = j;
795 j += 1;
796 } else if is_content(lines[j]) {
797 saw_content = true;
798 j += 1;
799 } else {
800 break;
801 }
802 }
803 if saw_content && last_border > start_line {
806 ranges.push(ByteRange {
807 start: line_offsets[start_line],
808 end: line_offsets[last_border + 1],
809 });
810 i = last_border + 1;
811 continue;
812 }
813 }
814 i += 1;
815 }
816 ranges
817}
818
819static MULTI_LINE_UNDERLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^-{2,}(?:\s+-{2,})+\s*$").unwrap());
832
833static MULTI_LINE_BORDER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^-{10,}\s*$").unwrap());
836
837pub fn detect_multi_line_table_ranges(content: &str) -> Vec<ByteRange> {
847 let mut lines: Vec<&str> = Vec::new();
848 let mut line_offsets: Vec<usize> = Vec::new();
849 let mut offset = 0usize;
850 for line in content.split_inclusive('\n') {
851 line_offsets.push(offset);
852 lines.push(line);
853 offset += line.len();
854 }
855 line_offsets.push(offset);
856
857 fn line_body(line: &str) -> &str {
858 line.trim_end_matches('\n').trim_end_matches('\r')
859 }
860 fn is_underline(line: &str) -> bool {
861 MULTI_LINE_UNDERLINE.is_match(line_body(line))
862 }
863 fn is_border(line: &str) -> bool {
864 MULTI_LINE_BORDER.is_match(line_body(line))
865 }
866
867 let mut ranges = Vec::new();
868 let mut i = 0;
869 while i < lines.len() {
870 if i >= 1 && is_underline(lines[i]) && !line_body(lines[i - 1]).is_empty() {
872 let mut header_start = i - 1;
876 while header_start > 0
877 && !line_body(lines[header_start - 1]).is_empty()
878 && !is_border(lines[header_start - 1])
879 && !is_underline(lines[header_start - 1])
880 {
881 header_start -= 1;
882 }
883
884 let start_line = if header_start > 0 && is_border(lines[header_start - 1]) {
886 header_start - 1
887 } else {
888 header_start
889 };
890
891 let mut j = i + 1;
893 let mut end_line: Option<usize> = None;
894 while j < lines.len() {
895 if is_border(lines[j]) {
896 end_line = Some(j);
898 break;
899 } else if j > i + 1 && is_underline(lines[j]) {
900 end_line = Some(j - 1);
903 break;
904 }
905 j += 1;
906 }
907
908 if let Some(end) = end_line {
909 ranges.push(ByteRange {
910 start: line_offsets[start_line],
911 end: line_offsets[end + 1],
912 });
913 i = end + 1;
914 continue;
915 }
916 }
918 i += 1;
919 }
920 ranges
921}
922
923pub fn detect_inline_footnote_ranges(content: &str) -> Vec<ByteRange> {
928 let mut ranges = Vec::new();
929 for caps in INLINE_FOOTNOTE_PATTERN.captures_iter(content) {
930 let m = caps.get(1).unwrap();
931 ranges.push(ByteRange {
932 start: m.start(),
933 end: m.end(),
934 });
935 }
936 ranges
937}
938
939pub fn find_citation_ranges(content: &str) -> Vec<ByteRange> {
948 let mut ranges = Vec::new();
949
950 let link_label_ranges: Vec<(usize, usize)> = LINK_LABEL_PATTERN
953 .captures_iter(content)
954 .filter_map(|c| c.get(1).map(|m| (m.start(), m.end())))
955 .collect();
956
957 let in_link_label = |pos: usize| -> bool { link_label_ranges.iter().any(|&(s, e)| pos >= s && pos < e) };
958
959 for mat in BRACKETED_CITATION_PATTERN.find_iter(content) {
961 if in_link_label(mat.start()) {
962 continue;
963 }
964 ranges.push(ByteRange {
965 start: mat.start(),
966 end: mat.end(),
967 });
968 }
969
970 for cap in INLINE_CITATION_PATTERN.captures_iter(content) {
972 if let Some(mat) = cap.get(1) {
973 let start = mat.start();
974 if in_link_label(start) {
975 continue;
976 }
977 if !ranges.iter().any(|r| start >= r.start && start < r.end) {
979 ranges.push(ByteRange { start, end: mat.end() });
980 }
981 }
982 }
983
984 ranges.sort_by_key(|r| r.start);
986 ranges
987}
988
989#[cfg(test)]
990mod tests {
991 use super::*;
992
993 #[test]
994 fn test_div_open_detection() {
995 assert!(is_div_open("::: {.callout-note}"));
997 assert!(is_div_open("::: {.callout-warning}"));
998 assert!(is_div_open("::: {#myid .class}"));
999 assert!(is_div_open("::: bordered"));
1000 assert!(is_div_open(" ::: {.note}")); assert!(is_div_open("::: {.callout-tip title=\"My Title\"}"));
1002
1003 assert!(!is_div_open(":::")); assert!(!is_div_open("::: ")); assert!(!is_div_open("Regular text"));
1007 assert!(!is_div_open("# Heading"));
1008 assert!(!is_div_open("```python")); }
1010
1011 #[test]
1012 fn test_div_close_detection() {
1013 assert!(is_div_close(":::"));
1014 assert!(is_div_close("::: "));
1015 assert!(is_div_close(" :::"));
1016 assert!(is_div_close(" ::: "));
1017
1018 assert!(!is_div_close("::: {.note}"));
1019 assert!(!is_div_close("::: class"));
1020 assert!(!is_div_close(":::note"));
1021 }
1022
1023 #[test]
1024 fn test_callout_detection() {
1025 assert!(is_callout_open("::: {.callout-note}"));
1026 assert!(is_callout_open("::: {.callout-warning}"));
1027 assert!(is_callout_open("::: {.callout-tip}"));
1028 assert!(is_callout_open("::: {.callout-important}"));
1029 assert!(is_callout_open("::: {.callout-caution}"));
1030 assert!(is_callout_open("::: {#myid .callout-note}"));
1031 assert!(is_callout_open("::: {.callout-note title=\"Title\"}"));
1032
1033 assert!(!is_callout_open("::: {.note}")); assert!(!is_callout_open("::: {.bordered}")); assert!(!is_callout_open("::: callout-note")); }
1037
1038 #[test]
1039 fn test_div_tracker() {
1040 let mut tracker = DivTracker::new();
1041
1042 assert!(tracker.process_line("::: {.callout-note}"));
1044 assert!(tracker.is_inside_div());
1045
1046 assert!(tracker.process_line("This is content."));
1048 assert!(tracker.is_inside_div());
1049
1050 assert!(!tracker.process_line(":::"));
1052 assert!(!tracker.is_inside_div());
1053 }
1054
1055 #[test]
1056 fn test_nested_divs() {
1057 let mut tracker = DivTracker::new();
1058
1059 assert!(tracker.process_line("::: {.outer}"));
1061 assert!(tracker.is_inside_div());
1062
1063 assert!(tracker.process_line(" ::: {.inner}"));
1065 assert!(tracker.is_inside_div());
1066
1067 assert!(tracker.process_line(" Content"));
1069 assert!(tracker.is_inside_div());
1070
1071 assert!(tracker.process_line(" :::"));
1073 assert!(tracker.is_inside_div());
1074
1075 assert!(!tracker.process_line(":::"));
1077 assert!(!tracker.is_inside_div());
1078 }
1079
1080 #[test]
1081 fn test_detect_div_block_ranges() {
1082 let content = r#"# Heading
1083
1084::: {.callout-note}
1085This is a note.
1086:::
1087
1088Regular text.
1089
1090::: {.bordered}
1091Content here.
1092:::
1093"#;
1094 let ranges = detect_div_block_ranges(content);
1095 assert_eq!(ranges.len(), 2);
1096
1097 let first_div_content = &content[ranges[0].start..ranges[0].end];
1099 assert!(first_div_content.contains("callout-note"));
1100 assert!(first_div_content.contains("This is a note"));
1101
1102 let second_div_content = &content[ranges[1].start..ranges[1].end];
1104 assert!(second_div_content.contains("bordered"));
1105 assert!(second_div_content.contains("Content here"));
1106 }
1107
1108 #[test]
1109 fn test_pandoc_attributes() {
1110 assert!(has_pandoc_attributes("# Heading {#custom-id}"));
1111 assert!(has_pandoc_attributes("# Heading {.unnumbered}"));
1112 assert!(has_pandoc_attributes("{#fig-1 width=\"50%\"}"));
1113 assert!(has_pandoc_attributes("{#id .class key=\"value\"}"));
1114
1115 assert!(!has_pandoc_attributes("# Heading"));
1116 assert!(!has_pandoc_attributes("Regular text"));
1117 assert!(!has_pandoc_attributes("{}"));
1118 }
1119
1120 #[test]
1121 fn test_div_with_title_attribute() {
1122 let content = r#"::: {.callout-note title="Important Note"}
1123This is the content of the note.
1124It can span multiple lines.
1125:::
1126"#;
1127 let ranges = detect_div_block_ranges(content);
1128 assert_eq!(ranges.len(), 1);
1129 assert!(is_callout_open("::: {.callout-note title=\"Important Note\"}"));
1130 }
1131
1132 #[test]
1133 fn test_unclosed_div() {
1134 let content = r#"::: {.callout-note}
1135This note is never closed.
1136"#;
1137 let ranges = detect_div_block_ranges(content);
1138 assert_eq!(ranges.len(), 1);
1139 assert_eq!(ranges[0].end, content.len());
1141 }
1142
1143 #[test]
1144 fn test_heading_inside_callout() {
1145 let content = r#"::: {.callout-warning}
1146## Warning Title
1147
1148Warning content here.
1149:::
1150"#;
1151 let ranges = detect_div_block_ranges(content);
1152 assert_eq!(ranges.len(), 1);
1153
1154 let div_content = &content[ranges[0].start..ranges[0].end];
1155 assert!(div_content.contains("## Warning Title"));
1156 }
1157
1158 #[test]
1160 fn test_has_citations() {
1161 assert!(has_citations("See @smith2020 for details."));
1162 assert!(has_citations("[@smith2020]"));
1163 assert!(has_citations("Multiple [@a; @b] citations"));
1164 assert!(!has_citations("No citations here"));
1165 assert!(has_citations("Email: user@example.com"));
1167 }
1168
1169 #[test]
1170 fn test_bracketed_citation_detection() {
1171 let content = "See [@smith2020] for more info.";
1172 let ranges = find_citation_ranges(content);
1173 assert_eq!(ranges.len(), 1);
1174 assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020]");
1175 }
1176
1177 #[test]
1178 fn test_inline_citation_detection() {
1179 let content = "As @smith2020 argues, this is true.";
1180 let ranges = find_citation_ranges(content);
1181 assert_eq!(ranges.len(), 1);
1182 assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
1183 }
1184
1185 #[test]
1186 fn test_multiple_citations_in_brackets() {
1187 let content = "See [@smith2020; @jones2021] for details.";
1188 let ranges = find_citation_ranges(content);
1189 assert_eq!(ranges.len(), 1);
1190 assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020; @jones2021]");
1191 }
1192
1193 #[test]
1194 fn test_citation_with_prefix() {
1195 let content = "[see @smith2020, p. 10]";
1196 let ranges = find_citation_ranges(content);
1197 assert_eq!(ranges.len(), 1);
1198 assert_eq!(&content[ranges[0].start..ranges[0].end], "[see @smith2020, p. 10]");
1199 }
1200
1201 #[test]
1202 fn test_suppress_author_citation() {
1203 let content = "The theory [-@smith2020] states that...";
1204 let ranges = find_citation_ranges(content);
1205 assert_eq!(ranges.len(), 1);
1206 assert_eq!(&content[ranges[0].start..ranges[0].end], "[-@smith2020]");
1207 }
1208
1209 #[test]
1210 fn test_mixed_citations() {
1211 let content = "@smith2020 argues that [@jones2021] is wrong.";
1212 let ranges = find_citation_ranges(content);
1213 assert_eq!(ranges.len(), 2);
1214 assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
1216 assert_eq!(&content[ranges[1].start..ranges[1].end], "[@jones2021]");
1218 }
1219
1220 #[test]
1221 fn test_email_not_confused_with_citation() {
1222 let content = "Contact user@example.com for help.";
1225 let ranges = find_citation_ranges(content);
1226 assert!(
1228 ranges.is_empty()
1229 || !ranges.iter().any(|r| {
1230 let s = &content[r.start..r.end];
1231 s.contains("example.com")
1232 })
1233 );
1234 }
1235
1236 #[test]
1241 fn test_bracketed_link_text_with_email_not_citation() {
1242 let content = "[contact user@example.com](#missing)";
1243 let ranges = find_citation_ranges(content);
1244 assert!(
1245 ranges.is_empty(),
1246 "Bracketed link text with embedded email must not be detected as a Pandoc citation: {ranges:?}"
1247 );
1248 }
1249
1250 #[test]
1252 fn test_bracketed_link_text_with_email_empty_href_not_citation() {
1253 let content = "[contact user@example.com]()";
1254 let ranges = find_citation_ranges(content);
1255 assert!(
1256 ranges.is_empty(),
1257 "Bracketed link text with embedded email and empty href must not be a Pandoc citation: {ranges:?}"
1258 );
1259 }
1260
1261 #[test]
1266 fn test_bracketed_text_followed_by_inline_link_not_citation() {
1267 let content = "[see @smith2020](#missing)";
1268 let ranges = find_citation_ranges(content);
1269 assert!(
1270 ranges.is_empty(),
1271 "Bracketed text followed by `(...)` is a link, not a citation: {ranges:?}"
1272 );
1273 }
1274
1275 #[test]
1277 fn test_bracketed_text_followed_by_empty_inline_link_not_citation() {
1278 let content = "[see @smith2020]()";
1279 let ranges = find_citation_ranges(content);
1280 assert!(
1281 ranges.is_empty(),
1282 "Bracketed text followed by `()` is a link with empty href, not a citation: {ranges:?}"
1283 );
1284 }
1285
1286 #[test]
1290 fn test_bracketed_text_followed_by_reference_link_not_citation() {
1291 let content = "[see @smith2020][ref]";
1292 let ranges = find_citation_ranges(content);
1293 assert!(
1294 ranges.is_empty(),
1295 "Bracketed text followed by `[ref]` is a reference link, not a citation: {ranges:?}"
1296 );
1297 }
1298
1299 #[test]
1302 fn test_standalone_bracketed_citation_still_detected() {
1303 let content = "See [see @smith2020] for details.";
1304 let ranges = find_citation_ranges(content);
1305 assert!(
1306 ranges.iter().any(|r| &content[r.start..r.end] == "[see @smith2020]"),
1307 "Standalone bracketed citation must still be detected: {ranges:?}"
1308 );
1309 }
1310
1311 #[test]
1313 fn test_bracketed_citation_followed_by_punctuation_still_detected() {
1314 let content = "Note [@smith2020].";
1315 let ranges = find_citation_ranges(content);
1316 assert!(
1317 ranges.iter().any(|r| &content[r.start..r.end] == "[@smith2020]"),
1318 "Bracketed citation followed by `.` must still be detected: {ranges:?}"
1319 );
1320 }
1321
1322 #[test]
1323 fn test_detect_inline_footnotes() {
1324 let content = "See ^[a quick note] here.\nAnd ^[another one] too.\n";
1325 let ranges = detect_inline_footnote_ranges(content);
1326 assert_eq!(ranges.len(), 2);
1327 let first_start = content.find("^[").unwrap();
1329 let first_end = content[first_start..].find(']').unwrap() + first_start + 1;
1330 assert_eq!(ranges[0].start, first_start);
1331 assert_eq!(ranges[0].end, first_end);
1332 let second_start = content[first_end..].find("^[").unwrap() + first_end;
1334 let second_end = content[second_start..].find(']').unwrap() + second_start + 1;
1335 assert_eq!(ranges[1].start, second_start);
1336 assert_eq!(ranges[1].end, second_end);
1337 }
1338
1339 #[test]
1340 fn test_inline_footnote_with_brackets_inside() {
1341 let content = "Note ^[ref to [other] thing] here.\n";
1346 let ranges = detect_inline_footnote_ranges(content);
1347 assert_eq!(ranges.len(), 1);
1348 }
1349
1350 #[test]
1351 fn test_inline_footnote_does_not_match_image_or_link() {
1352 let content = "An image  and a link [txt](url).\n";
1354 let ranges = detect_inline_footnote_ranges(content);
1355 assert_eq!(ranges.len(), 0);
1356 }
1357
1358 #[test]
1359 fn test_implicit_header_reference_slug() {
1360 assert_eq!(pandoc_header_slug("My Section"), "my-section");
1363 assert_eq!(pandoc_header_slug("API: v2!"), "api-v2");
1364 assert_eq!(pandoc_header_slug(" Trim Me "), "trim-me");
1365 assert_eq!(pandoc_header_slug("Multiple Spaces"), "multiple-spaces");
1366 }
1367
1368 #[test]
1369 fn test_collect_pandoc_header_slugs() {
1370 let content = "# My Section\n\n## Sub-section\n\nbody\n";
1371 let slugs = collect_pandoc_header_slugs(content);
1372 assert!(slugs.contains("my-section"));
1373 assert!(slugs.contains("sub-section"));
1374 }
1375
1376 #[test]
1377 fn test_collect_pandoc_header_slugs_strips_attribute_block() {
1378 let content = "# My Section {#custom-id .red}\n## Plain Section\n";
1379 let slugs = collect_pandoc_header_slugs(content);
1380 assert!(slugs.contains("my-section"));
1381 assert!(slugs.contains("plain-section"));
1382 assert!(!slugs.iter().any(|s| s.contains("custom-id")));
1384 }
1385
1386 #[test]
1387 fn test_collect_pandoc_header_slugs_preserves_body_braces() {
1388 let content = "# Some {curly} word in title\n";
1390 let slugs = collect_pandoc_header_slugs(content);
1391 assert!(slugs.contains("some-curly-word-in-title"));
1392 }
1393
1394 #[test]
1395 fn test_collect_pandoc_header_slugs_disambiguates_duplicates() {
1396 let content = "# A.\n\nbody\n\n# A.\n";
1400 let slugs = collect_pandoc_header_slugs(content);
1401 assert!(slugs.contains("a."), "first occurrence should expose base slug `a.`");
1402 assert!(
1403 slugs.contains("a.-1"),
1404 "second occurrence should expose `a.-1`: got {slugs:?}"
1405 );
1406 }
1407
1408 #[test]
1409 fn test_collect_pandoc_header_slugs_three_duplicates_get_two_suffixes() {
1410 let content = "# Intro\n\n# Intro\n\n# Intro\n";
1411 let slugs = collect_pandoc_header_slugs(content);
1412 assert!(slugs.contains("intro"));
1413 assert!(slugs.contains("intro-1"));
1414 assert!(slugs.contains("intro-2"));
1415 assert!(
1416 !slugs.contains("intro-3"),
1417 "three occurrences must produce only -1 and -2 suffixes, not -3: got {slugs:?}"
1418 );
1419 }
1420
1421 #[test]
1422 fn test_collect_pandoc_header_slugs_unique_headings_get_no_suffix() {
1423 let content = "# Foo\n\n# Bar\n\n# Baz\n";
1424 let slugs = collect_pandoc_header_slugs(content);
1425 assert!(slugs.contains("foo"));
1426 assert!(slugs.contains("bar"));
1427 assert!(slugs.contains("baz"));
1428 assert!(!slugs.contains("foo-1"));
1430 assert!(!slugs.contains("bar-1"));
1431 assert!(!slugs.contains("baz-1"));
1432 }
1433
1434 #[test]
1435 fn test_detect_example_list_markers() {
1436 let content = "(@) First item.\n(@good) Second item.\n(@) Third item.\n";
1437 let ranges = detect_example_list_marker_ranges(content);
1438 assert_eq!(ranges.len(), 3);
1439 assert_eq!(ranges[0].start, 0);
1440 assert_eq!(&content[ranges[0].start..ranges[0].end], "(@)");
1441 let second_start = content.find("(@good)").unwrap();
1442 assert_eq!(ranges[1].start, second_start);
1443 assert_eq!(&content[ranges[1].start..ranges[1].end], "(@good)");
1444 }
1445
1446 #[test]
1447 fn test_detect_example_references() {
1448 let content = "As shown in (@good), this works.\n";
1450 let marker_ranges = detect_example_list_marker_ranges(content);
1451 let ranges = detect_example_reference_ranges(content, &marker_ranges);
1452 assert_eq!(ranges.len(), 1);
1453 }
1454
1455 #[test]
1456 fn test_example_marker_must_be_at_line_start() {
1457 let content = "Inline (@) is not a marker.\n";
1458 let ranges = detect_example_list_marker_ranges(content);
1459 assert_eq!(ranges.len(), 0);
1460 }
1461
1462 #[test]
1463 fn test_detect_subscript() {
1464 let content = "H~2~O is water.\n";
1465 let ranges = detect_subscript_superscript_ranges(content);
1466 assert_eq!(ranges.len(), 1);
1467 assert_eq!(&content[ranges[0].start..ranges[0].end], "~2~");
1468 }
1469
1470 #[test]
1471 fn test_detect_superscript() {
1472 let content = "2^10^ is 1024.\n";
1473 let ranges = detect_subscript_superscript_ranges(content);
1474 assert_eq!(ranges.len(), 1);
1475 assert_eq!(&content[ranges[0].start..ranges[0].end], "^10^");
1476 }
1477
1478 #[test]
1479 fn test_subscript_does_not_match_strikethrough() {
1480 let content = "This is ~~struck~~.\n";
1482 let ranges = detect_subscript_superscript_ranges(content);
1483 assert_eq!(ranges.len(), 0);
1484 }
1485
1486 #[test]
1487 fn test_superscript_with_internal_space_is_not_matched() {
1488 let content = "x^a b^ y\n";
1490 let ranges = detect_subscript_superscript_ranges(content);
1491 assert_eq!(ranges.len(), 0);
1492 }
1493
1494 #[test]
1495 fn test_subscript_at_start_of_input() {
1496 let content = "~x~ rest of line\n";
1498 let ranges = detect_subscript_superscript_ranges(content);
1499 assert_eq!(ranges.len(), 1);
1500 assert_eq!(&content[ranges[0].start..ranges[0].end], "~x~");
1501 }
1502
1503 #[test]
1504 fn test_superscript_at_end_of_input_no_newline() {
1505 let content = "text ^x^";
1507 let ranges = detect_subscript_superscript_ranges(content);
1508 assert_eq!(ranges.len(), 1);
1509 assert_eq!(&content[ranges[0].start..ranges[0].end], "^x^");
1510 }
1511
1512 #[test]
1513 fn test_detect_inline_code_attribute() {
1514 let content = "Use `print()`{.python} for output.\n";
1516 let ranges = detect_inline_code_attr_ranges(content);
1517 assert_eq!(ranges.len(), 1);
1518 let r = &ranges[0];
1519 assert_eq!(&content[r.start..r.end], "{.python}");
1520 }
1521
1522 #[test]
1523 fn test_inline_code_attribute_only_after_backtick() {
1524 let content = "Use {.example} for the class.\n";
1526 let ranges = detect_inline_code_attr_ranges(content);
1527 assert_eq!(ranges.len(), 0);
1528 }
1529
1530 #[test]
1531 fn test_inline_code_attribute_multiple_on_one_line() {
1532 let content = "Use `a`{.x} and `b`{.y} here.\n";
1533 let ranges = detect_inline_code_attr_ranges(content);
1534 assert_eq!(ranges.len(), 2);
1535 assert_eq!(&content[ranges[0].start..ranges[0].end], "{.x}");
1536 assert_eq!(&content[ranges[1].start..ranges[1].end], "{.y}");
1537 }
1538
1539 #[test]
1540 fn test_inline_code_attribute_compound_attributes() {
1541 let content = "Use `code`{.lang #id key=value} here.\n";
1543 let ranges = detect_inline_code_attr_ranges(content);
1544 assert_eq!(ranges.len(), 1);
1545 assert_eq!(&content[ranges[0].start..ranges[0].end], "{.lang #id key=value}");
1546 }
1547
1548 #[test]
1549 fn test_detect_bracketed_span() {
1550 let content = "This is [some text]{.smallcaps} here.\n";
1551 let ranges = detect_bracketed_span_ranges(content);
1552 assert_eq!(ranges.len(), 1);
1553 let r = &ranges[0];
1554 assert_eq!(&content[r.start..r.end], "[some text]{.smallcaps}");
1555 }
1556
1557 #[test]
1558 fn test_bracketed_span_does_not_match_link() {
1559 let content = "A [link](http://example.com) here.\n";
1561 let ranges = detect_bracketed_span_ranges(content);
1562 assert_eq!(ranges.len(), 0);
1563 }
1564
1565 #[test]
1566 fn test_bracketed_span_does_not_match_reference_link() {
1567 let content = "A [ref][def] here.\n[def]: http://example.com\n";
1569 let ranges = detect_bracketed_span_ranges(content);
1570 assert_eq!(ranges.len(), 0);
1571 }
1572
1573 #[test]
1574 fn test_bracketed_span_multiple_on_one_line() {
1575 let content = "[one]{.a} and [two]{.b} together.\n";
1576 let ranges = detect_bracketed_span_ranges(content);
1577 assert_eq!(ranges.len(), 2);
1578 assert_eq!(&content[ranges[0].start..ranges[0].end], "[one]{.a}");
1579 assert_eq!(&content[ranges[1].start..ranges[1].end], "[two]{.b}");
1580 }
1581
1582 #[test]
1583 fn test_bracketed_span_rejects_empty_content() {
1584 let content = "[]{.x} and [x]{} here.\n";
1586 let ranges = detect_bracketed_span_ranges(content);
1587 assert_eq!(ranges.len(), 0);
1588 }
1589
1590 #[test]
1591 fn test_bracketed_span_at_start_of_line() {
1592 let content = "[head]{.intro} starts the line.\n";
1593 let ranges = detect_bracketed_span_ranges(content);
1594 assert_eq!(ranges.len(), 1);
1595 assert_eq!(ranges[0].start, 0);
1596 assert_eq!(&content[ranges[0].start..ranges[0].end], "[head]{.intro}");
1597 }
1598
1599 #[test]
1600 fn test_detect_line_block_single() {
1601 let content = "| The Lord of the Rings\n| by J.R.R. Tolkien\n";
1602 let ranges = detect_line_block_ranges(content);
1603 assert_eq!(ranges.len(), 1);
1604 assert_eq!(ranges[0].start, 0);
1605 assert_eq!(ranges[0].end, content.len());
1606 }
1607
1608 #[test]
1609 fn test_line_block_no_trailing_newline() {
1610 let content = "| Only line";
1612 let ranges = detect_line_block_ranges(content);
1613 assert_eq!(ranges.len(), 1);
1614 assert_eq!(ranges[0].start, 0);
1615 assert_eq!(ranges[0].end, content.len());
1616 }
1617
1618 #[test]
1619 fn test_line_block_indented_pipe_is_not_continuation() {
1620 let content = "| First\n | indented\n";
1623 let ranges = detect_line_block_ranges(content);
1624 assert_eq!(ranges.len(), 1);
1625 assert_eq!(ranges[0].end, "| First\n".len());
1626 }
1627
1628 #[test]
1629 fn test_line_block_continuation_with_indent() {
1630 let content = "| First line\n continuation\n| Second\n";
1633 let ranges = detect_line_block_ranges(content);
1634 assert_eq!(ranges.len(), 1);
1635 }
1636
1637 #[test]
1638 fn test_line_block_separated_by_blank() {
1639 let content = "| Block A\n\n| Block B\n";
1640 let ranges = detect_line_block_ranges(content);
1641 assert_eq!(ranges.len(), 2);
1642 }
1643
1644 #[test]
1645 fn test_line_block_does_not_match_pipe_table() {
1646 let content = "| col1 | col2 |\n|------|------|\n";
1648 let ranges = detect_line_block_ranges(content);
1649 assert_eq!(ranges.len(), 0);
1650 }
1651
1652 #[test]
1653 fn test_detect_pipe_table_caption_below() {
1654 let content = "\
1655| col1 | col2 |
1656|------|------|
1657| a | b |
1658
1659: My caption
1660";
1661 let ranges = detect_pipe_table_caption_ranges(content);
1662 assert_eq!(ranges.len(), 1);
1663 let cap = &content[ranges[0].start..ranges[0].end];
1664 assert!(cap.starts_with(": My caption"));
1665 }
1666
1667 #[test]
1668 fn test_detect_pipe_table_caption_above() {
1669 let content = "\
1670: Caption first
1671
1672| col1 | col2 |
1673|------|------|
1674| a | b |
1675";
1676 let ranges = detect_pipe_table_caption_ranges(content);
1677 assert_eq!(ranges.len(), 1);
1678 }
1679
1680 #[test]
1681 fn test_colon_line_without_adjacent_table_is_definition_term() {
1682 let content = "Term\n: definition\n";
1684 let ranges = detect_pipe_table_caption_ranges(content);
1685 assert_eq!(ranges.len(), 0);
1686 }
1687
1688 #[test]
1689 fn test_pipe_table_caption_two_blank_lines_does_not_match() {
1690 let content = "\
1692| a | b |
1693|---|---|
1694| 1 | 2 |
1695
1696
1697: Caption
1698";
1699 let ranges = detect_pipe_table_caption_ranges(content);
1700 assert_eq!(ranges.len(), 0);
1701 }
1702
1703 #[test]
1704 fn test_pipe_table_caption_no_blank_line_does_not_match() {
1705 let content = "\
1707| a | b |
1708|---|---|
1709| 1 | 2 |
1710: Caption
1711";
1712 let ranges = detect_pipe_table_caption_ranges(content);
1713 assert_eq!(ranges.len(), 0);
1714 }
1715
1716 #[test]
1717 fn test_pipe_table_caption_no_trailing_newline() {
1718 let content = "\
1721| a | b |
1722|---|---|
1723| 1 | 2 |
1724
1725: Trailing caption";
1726 let ranges = detect_pipe_table_caption_ranges(content);
1727 assert_eq!(ranges.len(), 1);
1728 assert_eq!(ranges[0].end, content.len());
1729 assert_eq!(&content[ranges[0].start..ranges[0].end], ": Trailing caption");
1730 }
1731
1732 #[test]
1733 fn test_pipe_table_caption_handles_crlf() {
1734 let content = "| a | b |\r\n|---|---|\r\n| 1 | 2 |\r\n\r\n: CRLF caption\r\n";
1736 let ranges = detect_pipe_table_caption_ranges(content);
1737 assert_eq!(ranges.len(), 1);
1738 let cap = &content[ranges[0].start..ranges[0].end];
1739 assert!(cap.starts_with(": CRLF caption"));
1740 }
1741
1742 #[test]
1743 fn test_pipe_table_caption_lone_colon_does_not_match() {
1744 let content = "\
1746| a | b |
1747|---|---|
1748| 1 | 2 |
1749
1750:
1751";
1752 let ranges = detect_pipe_table_caption_ranges(content);
1753 assert_eq!(ranges.len(), 0);
1754 }
1755
1756 #[test]
1757 fn test_detect_metadata_block_at_start() {
1758 let content = "---\ntitle: Doc\n---\n\nBody.\n";
1760 let ranges = detect_yaml_metadata_block_ranges(content);
1761 assert_eq!(ranges.len(), 1);
1762 assert_eq!(ranges[0].start, 0);
1763 }
1764
1765 #[test]
1766 fn test_detect_metadata_block_mid_document() {
1767 let content = "---\ntitle: Doc\n---\n\n# Heading\n\n---\nauthor: X\n---\n\nBody.\n";
1769 let ranges = detect_yaml_metadata_block_ranges(content);
1770 assert_eq!(ranges.len(), 2);
1771 }
1772
1773 #[test]
1774 fn test_metadata_block_uses_dot_terminator() {
1775 let content = "---\ntitle: Doc\n...\n\nBody.\n";
1777 let ranges = detect_yaml_metadata_block_ranges(content);
1778 assert_eq!(ranges.len(), 1);
1779 }
1780
1781 #[test]
1782 fn test_metadata_block_unterminated_opener_skipped() {
1783 let content = "---\ntitle: Doc\nbody continues forever\n";
1785 let ranges = detect_yaml_metadata_block_ranges(content);
1786 assert_eq!(ranges.len(), 0);
1787 }
1788
1789 #[test]
1790 fn test_metadata_block_dashes_after_text_are_not_opener() {
1791 let content = "Some prose paragraph.\n---\nbody: not-metadata\n---\n";
1794 let ranges = detect_yaml_metadata_block_ranges(content);
1795 assert_eq!(ranges.len(), 0);
1796 }
1797
1798 #[test]
1799 fn test_metadata_block_no_trailing_newline() {
1800 let content = "---\ntitle: Doc\n---";
1803 let ranges = detect_yaml_metadata_block_ranges(content);
1804 assert_eq!(ranges.len(), 1);
1805 assert_eq!(ranges[0].start, 0);
1806 assert_eq!(ranges[0].end, content.len());
1807 }
1808
1809 #[test]
1810 fn test_metadata_block_handles_crlf() {
1811 let content = "---\r\ntitle: Doc\r\n---\r\n\r\nBody.\r\n";
1813 let ranges = detect_yaml_metadata_block_ranges(content);
1814 assert_eq!(ranges.len(), 1);
1815 let block = &content[ranges[0].start..ranges[0].end];
1816 assert!(block.starts_with("---\r\n"));
1817 assert!(block.ends_with("---\r\n"));
1818 }
1819
1820 #[test]
1821 fn test_collect_pandoc_header_slugs_skips_code_blocks() {
1822 let content = "\
1823# Real Heading
1824
1825```bash
1826# This is a bash comment
1827#!/usr/bin/env bash
1828```
1829
1830# Another Heading
1831";
1832 let slugs = collect_pandoc_header_slugs(content);
1833 assert!(slugs.contains("real-heading"));
1834 assert!(slugs.contains("another-heading"));
1835 assert!(!slugs.contains("this-is-a-bash-comment"));
1836 assert!(!slugs.iter().any(|s| s.contains("usr-bin")));
1837 }
1838
1839 #[test]
1840 fn test_detect_simple_grid_table() {
1841 let content = "\
1842+---------+---------+
1843| col1 | col2 |
1844+=========+=========+
1845| a | b |
1846+---------+---------+
1847";
1848 let ranges = detect_grid_table_ranges(content);
1849 assert_eq!(ranges.len(), 1);
1850 assert_eq!(ranges[0].start, 0);
1851 assert_eq!(ranges[0].end, content.len());
1852 }
1853
1854 #[test]
1855 fn test_grid_table_with_surrounding_text() {
1856 let content = "\
1857Before.
1858
1859+---+---+
1860| a | b |
1861+---+---+
1862| 1 | 2 |
1863+---+---+
1864
1865After.
1866";
1867 let ranges = detect_grid_table_ranges(content);
1868 assert_eq!(ranges.len(), 1);
1869 let region = &content[ranges[0].start..ranges[0].end];
1870 assert!(region.contains("+---+---+"));
1871 assert!(!region.contains("Before"));
1872 assert!(!region.contains("After"));
1873 }
1874
1875 #[test]
1876 fn test_lone_plus_dash_line_is_not_a_table() {
1877 let content = "Just a +---+ in prose.\n";
1878 let ranges = detect_grid_table_ranges(content);
1879 assert_eq!(ranges.len(), 0);
1880 }
1881
1882 #[test]
1883 fn test_grid_table_no_trailing_newline() {
1884 let content = "+---+---+\n| a | b |\n+---+---+\n| 1 | 2 |\n+---+---+";
1887 let ranges = detect_grid_table_ranges(content);
1888 assert_eq!(ranges.len(), 1);
1889 assert_eq!(ranges[0].start, 0);
1890 assert_eq!(ranges[0].end, content.len());
1891 }
1892
1893 #[test]
1894 fn test_grid_table_crlf() {
1895 let content = "+---+---+\r\n| a | b |\r\n+---+---+\r\n| 1 | 2 |\r\n+---+---+\r\n";
1897 let ranges = detect_grid_table_ranges(content);
1898 assert_eq!(ranges.len(), 1);
1899 assert_eq!(ranges[0].start, 0);
1900 assert_eq!(ranges[0].end, content.len());
1901 }
1902
1903 #[test]
1904 fn test_grid_table_borders_only_no_content_row_rejected() {
1905 let content = "+---+\n+---+\n";
1907 let ranges = detect_grid_table_ranges(content);
1908 assert_eq!(ranges.len(), 0);
1909 }
1910
1911 #[test]
1916 fn test_detect_multi_line_table() {
1917 let content = "\
1918-------------------------------------------------------------
1919 Centered Default Right Left
1920 Header Aligned Aligned Aligned
1921----------- ------- --------------- -------------------------
1922 First row 12.0 Example of a row that
1923 spans multiple lines.
1924
1925 Second row 5.0 Here's another one. Note
1926 the blank line between
1927 rows.
1928-------------------------------------------------------------
1929";
1930 let ranges = detect_multi_line_table_ranges(content);
1931 assert_eq!(ranges.len(), 1);
1932 assert_eq!(ranges[0].start, 0);
1933 assert_eq!(ranges[0].end, content.len());
1934 }
1935
1936 #[test]
1937 fn test_simple_dash_header_underline_only_does_not_match() {
1938 let content = "Some text\n--------\nMore text\n";
1941 let ranges = detect_multi_line_table_ranges(content);
1942 assert_eq!(ranges.len(), 0);
1943 }
1944
1945 #[test]
1946 fn test_multi_line_table_no_trailing_newline() {
1947 let content = "\
1949-------------------------------------------------------------
1950 Centered Default Right Left
1951 Header Aligned Aligned Aligned
1952----------- ------- --------------- -------------------------
1953 First row 12.0 Example of a row that
1954 spans multiple lines.
1955
1956 Second row 5.0 Here's another one. Note
1957 the blank line between
1958 rows.
1959-------------------------------------------------------------";
1960 let ranges = detect_multi_line_table_ranges(content);
1961 assert_eq!(ranges.len(), 1);
1962 assert_eq!(ranges[0].end, content.len());
1963 }
1964
1965 #[test]
1966 fn test_multi_line_table_crlf() {
1967 let content = "\
1969-------------------------------------------------------------\r\n\
1970 Centered Default Right Left\r\n\
1971 Header Aligned Aligned Aligned\r\n\
1972----------- ------- --------------- -------------------------\r\n\
1973 First row 12.0 Example of a row that\r\n\
1974 spans multiple lines.\r\n\
1975\r\n\
1976 Second row 5.0 Here's another one. Note\r\n\
1977 the blank line between\r\n\
1978 rows.\r\n\
1979-------------------------------------------------------------\r\n";
1980 let ranges = detect_multi_line_table_ranges(content);
1981 assert_eq!(ranges.len(), 1);
1982 assert_eq!(ranges[0].start, 0);
1983 assert_eq!(ranges[0].end, content.len());
1984 }
1985
1986 #[test]
1987 fn test_multi_line_table_unterminated_skipped() {
1988 let content = "\
1990 Centered Default
1991 Header Aligned
1992----------- -------
1993 First row
1994 Second row
1995";
1996 let ranges = detect_multi_line_table_ranges(content);
1997 assert_eq!(ranges.len(), 0);
1998 }
1999
2000 #[test]
2001 fn test_multi_line_table_no_top_border() {
2002 let content = "\
2005 Centered Default Right Left
2006----------- ------- --------------- -------------------------
2007 First row 12.0 Example
2008 Second row 5.0 Another
2009-------------------------------------------------------------
2010";
2011 let ranges = detect_multi_line_table_ranges(content);
2012 assert_eq!(ranges.len(), 1);
2013 assert_eq!(ranges[0].start, 0);
2014 assert_eq!(ranges[0].end, content.len());
2015 }
2016
2017 #[test]
2018 fn test_is_pandoc_raw_block_lang() {
2019 assert!(is_pandoc_raw_block_lang("{=html}"));
2020 assert!(is_pandoc_raw_block_lang("{=latex}"));
2021 assert!(is_pandoc_raw_block_lang("{=docx}"));
2022 assert!(is_pandoc_raw_block_lang("{=rst}"));
2023 assert!(is_pandoc_raw_block_lang("{=open-document}"));
2025 assert!(is_pandoc_raw_block_lang("{=my_format}"));
2026 assert!(is_pandoc_raw_block_lang("{=HTML}"));
2029 assert!(!is_pandoc_raw_block_lang("{r}"));
2031 assert!(!is_pandoc_raw_block_lang("{python}"));
2032 assert!(!is_pandoc_raw_block_lang("{=}"));
2034 assert!(!is_pandoc_raw_block_lang("{= }"));
2035 assert!(!is_pandoc_raw_block_lang("=html"));
2036 assert!(!is_pandoc_raw_block_lang("{=html }"));
2038 assert!(!is_pandoc_raw_block_lang("{=ht ml}"));
2039 }
2040
2041 #[test]
2042 fn test_is_pandoc_code_class_attr() {
2043 assert!(is_pandoc_code_class_attr("{.python}"));
2045 assert!(is_pandoc_code_class_attr("{.haskell}"));
2046 assert!(is_pandoc_code_class_attr("{.rust}"));
2047 assert!(is_pandoc_code_class_attr("{.haskell .numberLines}"));
2049 assert!(is_pandoc_code_class_attr("{#myid .python}"));
2051 assert!(is_pandoc_code_class_attr("{.python startFrom=\"10\"}"));
2053 assert!(is_pandoc_code_class_attr("{#snippet .python startFrom=\"10\"}"));
2055 assert!(is_pandoc_code_class_attr("{.objective-c}"));
2057 assert!(is_pandoc_code_class_attr("{.my_lang}"));
2058
2059 assert!(!is_pandoc_code_class_attr("{}"));
2061 assert!(!is_pandoc_code_class_attr("{#myid}"));
2062 assert!(!is_pandoc_code_class_attr("{startFrom=\"10\"}"));
2063 assert!(!is_pandoc_code_class_attr("{=html}"));
2065 assert!(!is_pandoc_code_class_attr("{r}"));
2067 assert!(!is_pandoc_code_class_attr("{python}"));
2068 assert!(!is_pandoc_code_class_attr("{.}"));
2070 assert!(!is_pandoc_code_class_attr(".python"));
2072 assert!(!is_pandoc_code_class_attr("python"));
2073 }
2074}