rumdl_lib/utils/
quarto_divs.rs1use regex::Regex;
17use std::sync::LazyLock;
18
19use crate::utils::skip_context::ByteRange;
20
21static DIV_OPEN_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*(?:\{[^}]+\}|\S+)").unwrap());
25
26static DIV_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*$").unwrap());
29
30static CALLOUT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(r"^(\s*):::\s*\{[^}]*\.callout-(?:note|warning|tip|important|caution)[^}]*\}").unwrap()
34});
35
36static PANDOC_ATTR_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{[^}]+\}").unwrap());
40
41pub fn is_div_open(line: &str) -> bool {
43 DIV_OPEN_PATTERN.is_match(line)
44}
45
46pub fn is_div_close(line: &str) -> bool {
48 DIV_CLOSE_PATTERN.is_match(line)
49}
50
51pub fn is_callout_open(line: &str) -> bool {
53 CALLOUT_PATTERN.is_match(line)
54}
55
56pub fn has_pandoc_attributes(line: &str) -> bool {
58 PANDOC_ATTR_PATTERN.is_match(line)
59}
60
61pub fn get_div_indent(line: &str) -> usize {
63 let mut indent = 0;
64 for c in line.chars() {
65 match c {
66 ' ' => indent += 1,
67 '\t' => indent += 4, _ => break,
69 }
70 }
71 indent
72}
73
74#[derive(Debug, Clone, Default)]
76pub struct DivTracker {
77 indent_stack: Vec<usize>,
79}
80
81impl DivTracker {
82 pub fn new() -> Self {
83 Self::default()
84 }
85
86 pub fn process_line(&mut self, line: &str) -> bool {
88 let trimmed = line.trim_start();
89
90 if trimmed.starts_with(":::") {
91 let indent = get_div_indent(line);
92
93 if is_div_close(line) {
94 if let Some(&top_indent) = self.indent_stack.last()
97 && top_indent >= indent
98 {
99 self.indent_stack.pop();
100 }
101 } else if is_div_open(line) {
102 self.indent_stack.push(indent);
104 }
105 }
106
107 !self.indent_stack.is_empty()
108 }
109
110 pub fn is_inside_div(&self) -> bool {
112 !self.indent_stack.is_empty()
113 }
114
115 pub fn depth(&self) -> usize {
117 self.indent_stack.len()
118 }
119}
120
121pub fn detect_div_block_ranges(content: &str) -> Vec<ByteRange> {
124 let mut ranges = Vec::new();
125 let mut tracker = DivTracker::new();
126 let mut div_start: Option<usize> = None;
127 let mut byte_offset = 0;
128
129 for line in content.lines() {
130 let line_len = line.len();
131 let was_inside = tracker.is_inside_div();
132 let is_inside = tracker.process_line(line);
133
134 if !was_inside && is_inside {
136 div_start = Some(byte_offset);
137 }
138 else if was_inside
140 && !is_inside
141 && let Some(start) = div_start.take()
142 {
143 ranges.push(ByteRange {
145 start,
146 end: byte_offset + line_len,
147 });
148 }
149
150 byte_offset += line_len + 1;
152 }
153
154 if let Some(start) = div_start {
156 ranges.push(ByteRange {
157 start,
158 end: content.len(),
159 });
160 }
161
162 ranges
163}
164
165pub fn is_within_div_block_ranges(ranges: &[ByteRange], position: usize) -> bool {
167 ranges.iter().any(|r| position >= r.start && position < r.end)
168}
169
170pub fn extract_classes(line: &str) -> Vec<String> {
173 let mut classes = Vec::new();
174
175 if let Some(captures) = PANDOC_ATTR_PATTERN.find(line) {
177 let attr_block = captures.as_str();
178 let inner = attr_block.trim_start_matches('{').trim_end_matches('}').trim();
180
181 for part in inner.split_whitespace() {
183 if let Some(class) = part.strip_prefix('.') {
184 let class = class.split('=').next().unwrap_or(class);
186 if !class.is_empty() {
187 classes.push(class.to_string());
188 }
189 }
190 }
191 }
192
193 classes
194}
195
196pub fn extract_id(line: &str) -> Option<String> {
198 if let Some(captures) = PANDOC_ATTR_PATTERN.find(line) {
199 let attr_block = captures.as_str();
200 let inner = attr_block.trim_start_matches('{').trim_end_matches('}').trim();
202
203 for part in inner.split_whitespace() {
205 if let Some(id) = part.strip_prefix('#') {
206 let id = id.split('=').next().unwrap_or(id);
208 if !id.is_empty() {
209 return Some(id.to_string());
210 }
211 }
212 }
213 }
214 None
215}
216
217static BRACKETED_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
234 Regex::new(r"\[[^\]]*@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*[^\]]*\]").unwrap()
236});
237
238static INLINE_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
242 Regex::new(r"(?:^|[\s\(\[\{,;:])(@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*)").unwrap()
244});
245
246#[inline]
248pub fn has_citations(text: &str) -> bool {
249 text.contains('@')
250}
251
252pub fn find_citation_ranges(content: &str) -> Vec<ByteRange> {
255 let mut ranges = Vec::new();
256
257 for mat in BRACKETED_CITATION_PATTERN.find_iter(content) {
259 ranges.push(ByteRange {
260 start: mat.start(),
261 end: mat.end(),
262 });
263 }
264
265 for cap in INLINE_CITATION_PATTERN.captures_iter(content) {
267 if let Some(mat) = cap.get(1) {
268 let start = mat.start();
269 if !ranges.iter().any(|r| start >= r.start && start < r.end) {
271 ranges.push(ByteRange { start, end: mat.end() });
272 }
273 }
274 }
275
276 ranges.sort_by_key(|r| r.start);
278 ranges
279}
280
281pub fn is_in_citation(ranges: &[ByteRange], position: usize) -> bool {
283 ranges.iter().any(|r| position >= r.start && position < r.end)
284}
285
286pub fn extract_citation_key(citation: &str) -> Option<&str> {
288 citation.strip_prefix('@').or_else(|| {
289 citation.strip_prefix("[-@").and_then(|s| s.strip_suffix(']'))
291 })
292}
293
294#[cfg(test)]
295mod tests {
296 use super::*;
297
298 #[test]
299 fn test_div_open_detection() {
300 assert!(is_div_open("::: {.callout-note}"));
302 assert!(is_div_open("::: {.callout-warning}"));
303 assert!(is_div_open("::: {#myid .class}"));
304 assert!(is_div_open("::: bordered"));
305 assert!(is_div_open(" ::: {.note}")); assert!(is_div_open("::: {.callout-tip title=\"My Title\"}"));
307
308 assert!(!is_div_open(":::")); assert!(!is_div_open("::: ")); assert!(!is_div_open("Regular text"));
312 assert!(!is_div_open("# Heading"));
313 assert!(!is_div_open("```python")); }
315
316 #[test]
317 fn test_div_close_detection() {
318 assert!(is_div_close(":::"));
319 assert!(is_div_close("::: "));
320 assert!(is_div_close(" :::"));
321 assert!(is_div_close(" ::: "));
322
323 assert!(!is_div_close("::: {.note}"));
324 assert!(!is_div_close("::: class"));
325 assert!(!is_div_close(":::note"));
326 }
327
328 #[test]
329 fn test_callout_detection() {
330 assert!(is_callout_open("::: {.callout-note}"));
331 assert!(is_callout_open("::: {.callout-warning}"));
332 assert!(is_callout_open("::: {.callout-tip}"));
333 assert!(is_callout_open("::: {.callout-important}"));
334 assert!(is_callout_open("::: {.callout-caution}"));
335 assert!(is_callout_open("::: {#myid .callout-note}"));
336 assert!(is_callout_open("::: {.callout-note title=\"Title\"}"));
337
338 assert!(!is_callout_open("::: {.note}")); assert!(!is_callout_open("::: {.bordered}")); assert!(!is_callout_open("::: callout-note")); }
342
343 #[test]
344 fn test_div_tracker() {
345 let mut tracker = DivTracker::new();
346
347 assert!(tracker.process_line("::: {.callout-note}"));
349 assert!(tracker.is_inside_div());
350 assert_eq!(tracker.depth(), 1);
351
352 assert!(tracker.process_line("This is content."));
354 assert!(tracker.is_inside_div());
355
356 assert!(!tracker.process_line(":::"));
358 assert!(!tracker.is_inside_div());
359 assert_eq!(tracker.depth(), 0);
360 }
361
362 #[test]
363 fn test_nested_divs() {
364 let mut tracker = DivTracker::new();
365
366 assert!(tracker.process_line("::: {.outer}"));
368 assert_eq!(tracker.depth(), 1);
369
370 assert!(tracker.process_line(" ::: {.inner}"));
372 assert_eq!(tracker.depth(), 2);
373
374 assert!(tracker.process_line(" Content"));
376 assert!(tracker.is_inside_div());
377
378 assert!(tracker.process_line(" :::"));
380 assert_eq!(tracker.depth(), 1);
381
382 assert!(!tracker.process_line(":::"));
384 assert_eq!(tracker.depth(), 0);
385 }
386
387 #[test]
388 fn test_detect_div_block_ranges() {
389 let content = r#"# Heading
390
391::: {.callout-note}
392This is a note.
393:::
394
395Regular text.
396
397::: {.bordered}
398Content here.
399:::
400"#;
401 let ranges = detect_div_block_ranges(content);
402 assert_eq!(ranges.len(), 2);
403
404 let first_div_content = &content[ranges[0].start..ranges[0].end];
406 assert!(first_div_content.contains("callout-note"));
407 assert!(first_div_content.contains("This is a note"));
408
409 let second_div_content = &content[ranges[1].start..ranges[1].end];
411 assert!(second_div_content.contains("bordered"));
412 assert!(second_div_content.contains("Content here"));
413 }
414
415 #[test]
416 fn test_extract_classes() {
417 assert_eq!(extract_classes("::: {.callout-note}"), vec!["callout-note"]);
418 assert_eq!(
419 extract_classes("::: {#myid .bordered .highlighted}"),
420 vec!["bordered", "highlighted"]
421 );
422 assert_eq!(
423 extract_classes("::: {.callout-warning title=\"Alert\"}"),
424 vec!["callout-warning"]
425 );
426
427 assert!(extract_classes("Regular text").is_empty());
428 assert!(extract_classes("::: classname").is_empty()); }
430
431 #[test]
432 fn test_extract_id() {
433 assert_eq!(extract_id("::: {#myid}"), Some("myid".to_string()));
434 assert_eq!(extract_id("::: {#myid .class}"), Some("myid".to_string()));
435 assert_eq!(extract_id("::: {.class #custom-id}"), Some("custom-id".to_string()));
436
437 assert_eq!(extract_id("::: {.class}"), None);
438 assert_eq!(extract_id("Regular text"), None);
439 }
440
441 #[test]
442 fn test_pandoc_attributes() {
443 assert!(has_pandoc_attributes("# Heading {#custom-id}"));
444 assert!(has_pandoc_attributes("# Heading {.unnumbered}"));
445 assert!(has_pandoc_attributes("{#fig-1 width=\"50%\"}"));
446 assert!(has_pandoc_attributes("{#id .class key=\"value\"}"));
447
448 assert!(!has_pandoc_attributes("# Heading"));
449 assert!(!has_pandoc_attributes("Regular text"));
450 assert!(!has_pandoc_attributes("{}"));
451 }
452
453 #[test]
454 fn test_div_with_title_attribute() {
455 let content = r#"::: {.callout-note title="Important Note"}
456This is the content of the note.
457It can span multiple lines.
458:::
459"#;
460 let ranges = detect_div_block_ranges(content);
461 assert_eq!(ranges.len(), 1);
462 assert!(is_callout_open("::: {.callout-note title=\"Important Note\"}"));
463 }
464
465 #[test]
466 fn test_unclosed_div() {
467 let content = r#"::: {.callout-note}
468This note is never closed.
469"#;
470 let ranges = detect_div_block_ranges(content);
471 assert_eq!(ranges.len(), 1);
472 assert_eq!(ranges[0].end, content.len());
474 }
475
476 #[test]
477 fn test_heading_inside_callout() {
478 let content = r#"::: {.callout-warning}
479## Warning Title
480
481Warning content here.
482:::
483"#;
484 let ranges = detect_div_block_ranges(content);
485 assert_eq!(ranges.len(), 1);
486
487 let div_content = &content[ranges[0].start..ranges[0].end];
488 assert!(div_content.contains("## Warning Title"));
489 }
490
491 #[test]
493 fn test_has_citations() {
494 assert!(has_citations("See @smith2020 for details."));
495 assert!(has_citations("[@smith2020]"));
496 assert!(has_citations("Multiple [@a; @b] citations"));
497 assert!(!has_citations("No citations here"));
498 assert!(has_citations("Email: user@example.com"));
500 }
501
502 #[test]
503 fn test_bracketed_citation_detection() {
504 let content = "See [@smith2020] for more info.";
505 let ranges = find_citation_ranges(content);
506 assert_eq!(ranges.len(), 1);
507 assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020]");
508 }
509
510 #[test]
511 fn test_inline_citation_detection() {
512 let content = "As @smith2020 argues, this is true.";
513 let ranges = find_citation_ranges(content);
514 assert_eq!(ranges.len(), 1);
515 assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
516 }
517
518 #[test]
519 fn test_multiple_citations_in_brackets() {
520 let content = "See [@smith2020; @jones2021] for details.";
521 let ranges = find_citation_ranges(content);
522 assert_eq!(ranges.len(), 1);
523 assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020; @jones2021]");
524 }
525
526 #[test]
527 fn test_citation_with_prefix() {
528 let content = "[see @smith2020, p. 10]";
529 let ranges = find_citation_ranges(content);
530 assert_eq!(ranges.len(), 1);
531 assert_eq!(&content[ranges[0].start..ranges[0].end], "[see @smith2020, p. 10]");
532 }
533
534 #[test]
535 fn test_suppress_author_citation() {
536 let content = "The theory [-@smith2020] states that...";
537 let ranges = find_citation_ranges(content);
538 assert_eq!(ranges.len(), 1);
539 assert_eq!(&content[ranges[0].start..ranges[0].end], "[-@smith2020]");
540 }
541
542 #[test]
543 fn test_mixed_citations() {
544 let content = "@smith2020 argues that [@jones2021] is wrong.";
545 let ranges = find_citation_ranges(content);
546 assert_eq!(ranges.len(), 2);
547 assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
549 assert_eq!(&content[ranges[1].start..ranges[1].end], "[@jones2021]");
551 }
552
553 #[test]
554 fn test_citation_key_extraction() {
555 assert_eq!(extract_citation_key("@smith2020"), Some("smith2020"));
556 assert_eq!(extract_citation_key("@Smith_2020"), Some("Smith_2020"));
557 assert_eq!(extract_citation_key("@key:with:colons"), Some("key:with:colons"));
558 assert_eq!(extract_citation_key("not-a-citation"), None);
559 }
560
561 #[test]
562 fn test_is_in_citation() {
563 let content = "See [@smith2020] here.";
564 let ranges = find_citation_ranges(content);
565
566 assert!(is_in_citation(&ranges, 5)); assert!(!is_in_citation(&ranges, 0)); assert!(!is_in_citation(&ranges, 17)); }
572
573 #[test]
574 fn test_email_not_confused_with_citation() {
575 let content = "Contact user@example.com for help.";
578 let ranges = find_citation_ranges(content);
579 assert!(
581 ranges.is_empty()
582 || !ranges.iter().any(|r| {
583 let s = &content[r.start..r.end];
584 s.contains("example.com")
585 })
586 );
587 }
588}