rumdl_lib/utils/
quarto_divs.rs1use regex::Regex;
17use std::sync::LazyLock;
18
19use crate::utils::skip_context::ByteRange;
20
21static DIV_OPEN_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*(?:\{[^}]+\}|\S+)").unwrap());
25
26static DIV_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*$").unwrap());
29
30static CALLOUT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33 Regex::new(r"^(\s*):::\s*\{[^}]*\.callout-(?:note|warning|tip|important|caution)[^}]*\}").unwrap()
34});
35
36static PANDOC_ATTR_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{[^}]+\}").unwrap());
40
41pub fn is_div_open(line: &str) -> bool {
43 DIV_OPEN_PATTERN.is_match(line)
44}
45
46pub fn is_div_close(line: &str) -> bool {
48 DIV_CLOSE_PATTERN.is_match(line)
49}
50
51pub fn is_callout_open(line: &str) -> bool {
53 CALLOUT_PATTERN.is_match(line)
54}
55
56pub fn has_pandoc_attributes(line: &str) -> bool {
58 PANDOC_ATTR_PATTERN.is_match(line)
59}
60
61pub fn get_div_indent(line: &str) -> usize {
63 let mut indent = 0;
64 for c in line.chars() {
65 match c {
66 ' ' => indent += 1,
67 '\t' => indent += 4, _ => break,
69 }
70 }
71 indent
72}
73
74#[derive(Debug, Clone, Default)]
76pub struct DivTracker {
77 indent_stack: Vec<usize>,
79}
80
81impl DivTracker {
82 pub fn new() -> Self {
83 Self::default()
84 }
85
86 pub fn process_line(&mut self, line: &str) -> bool {
88 let trimmed = line.trim_start();
89
90 if trimmed.starts_with(":::") {
91 let indent = get_div_indent(line);
92
93 if is_div_close(line) {
94 if let Some(&top_indent) = self.indent_stack.last()
97 && top_indent >= indent
98 {
99 self.indent_stack.pop();
100 }
101 } else if is_div_open(line) {
102 self.indent_stack.push(indent);
104 }
105 }
106
107 !self.indent_stack.is_empty()
108 }
109
110 pub fn is_inside_div(&self) -> bool {
112 !self.indent_stack.is_empty()
113 }
114}
115
116pub fn detect_div_block_ranges(content: &str) -> Vec<ByteRange> {
119 let mut ranges = Vec::new();
120 let mut tracker = DivTracker::new();
121 let mut div_start: Option<usize> = None;
122 let mut byte_offset = 0;
123
124 for line in content.lines() {
125 let line_len = line.len();
126 let was_inside = tracker.is_inside_div();
127 let is_inside = tracker.process_line(line);
128
129 if !was_inside && is_inside {
131 div_start = Some(byte_offset);
132 }
133 else if was_inside
135 && !is_inside
136 && let Some(start) = div_start.take()
137 {
138 ranges.push(ByteRange {
140 start,
141 end: byte_offset + line_len,
142 });
143 }
144
145 byte_offset += line_len + 1;
147 }
148
149 if let Some(start) = div_start {
151 ranges.push(ByteRange {
152 start,
153 end: content.len(),
154 });
155 }
156
157 ranges
158}
159
160pub fn is_within_div_block_ranges(ranges: &[ByteRange], position: usize) -> bool {
162 ranges.iter().any(|r| position >= r.start && position < r.end)
163}
164
165static BRACKETED_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
182 Regex::new(r"\[[^\]]*@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*[^\]]*\]").unwrap()
184});
185
186static INLINE_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
190 Regex::new(r"(?:^|[\s\(\[\{,;:])(@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*)").unwrap()
192});
193
194#[inline]
196pub fn has_citations(text: &str) -> bool {
197 text.contains('@')
198}
199
200pub fn find_citation_ranges(content: &str) -> Vec<ByteRange> {
203 let mut ranges = Vec::new();
204
205 for mat in BRACKETED_CITATION_PATTERN.find_iter(content) {
207 ranges.push(ByteRange {
208 start: mat.start(),
209 end: mat.end(),
210 });
211 }
212
213 for cap in INLINE_CITATION_PATTERN.captures_iter(content) {
215 if let Some(mat) = cap.get(1) {
216 let start = mat.start();
217 if !ranges.iter().any(|r| start >= r.start && start < r.end) {
219 ranges.push(ByteRange { start, end: mat.end() });
220 }
221 }
222 }
223
224 ranges.sort_by_key(|r| r.start);
226 ranges
227}
228
229#[cfg(test)]
230mod tests {
231 use super::*;
232
233 #[test]
234 fn test_div_open_detection() {
235 assert!(is_div_open("::: {.callout-note}"));
237 assert!(is_div_open("::: {.callout-warning}"));
238 assert!(is_div_open("::: {#myid .class}"));
239 assert!(is_div_open("::: bordered"));
240 assert!(is_div_open(" ::: {.note}")); assert!(is_div_open("::: {.callout-tip title=\"My Title\"}"));
242
243 assert!(!is_div_open(":::")); assert!(!is_div_open("::: ")); assert!(!is_div_open("Regular text"));
247 assert!(!is_div_open("# Heading"));
248 assert!(!is_div_open("```python")); }
250
251 #[test]
252 fn test_div_close_detection() {
253 assert!(is_div_close(":::"));
254 assert!(is_div_close("::: "));
255 assert!(is_div_close(" :::"));
256 assert!(is_div_close(" ::: "));
257
258 assert!(!is_div_close("::: {.note}"));
259 assert!(!is_div_close("::: class"));
260 assert!(!is_div_close(":::note"));
261 }
262
263 #[test]
264 fn test_callout_detection() {
265 assert!(is_callout_open("::: {.callout-note}"));
266 assert!(is_callout_open("::: {.callout-warning}"));
267 assert!(is_callout_open("::: {.callout-tip}"));
268 assert!(is_callout_open("::: {.callout-important}"));
269 assert!(is_callout_open("::: {.callout-caution}"));
270 assert!(is_callout_open("::: {#myid .callout-note}"));
271 assert!(is_callout_open("::: {.callout-note title=\"Title\"}"));
272
273 assert!(!is_callout_open("::: {.note}")); assert!(!is_callout_open("::: {.bordered}")); assert!(!is_callout_open("::: callout-note")); }
277
278 #[test]
279 fn test_div_tracker() {
280 let mut tracker = DivTracker::new();
281
282 assert!(tracker.process_line("::: {.callout-note}"));
284 assert!(tracker.is_inside_div());
285
286 assert!(tracker.process_line("This is content."));
288 assert!(tracker.is_inside_div());
289
290 assert!(!tracker.process_line(":::"));
292 assert!(!tracker.is_inside_div());
293 }
294
295 #[test]
296 fn test_nested_divs() {
297 let mut tracker = DivTracker::new();
298
299 assert!(tracker.process_line("::: {.outer}"));
301 assert!(tracker.is_inside_div());
302
303 assert!(tracker.process_line(" ::: {.inner}"));
305 assert!(tracker.is_inside_div());
306
307 assert!(tracker.process_line(" Content"));
309 assert!(tracker.is_inside_div());
310
311 assert!(tracker.process_line(" :::"));
313 assert!(tracker.is_inside_div());
314
315 assert!(!tracker.process_line(":::"));
317 assert!(!tracker.is_inside_div());
318 }
319
320 #[test]
321 fn test_detect_div_block_ranges() {
322 let content = r#"# Heading
323
324::: {.callout-note}
325This is a note.
326:::
327
328Regular text.
329
330::: {.bordered}
331Content here.
332:::
333"#;
334 let ranges = detect_div_block_ranges(content);
335 assert_eq!(ranges.len(), 2);
336
337 let first_div_content = &content[ranges[0].start..ranges[0].end];
339 assert!(first_div_content.contains("callout-note"));
340 assert!(first_div_content.contains("This is a note"));
341
342 let second_div_content = &content[ranges[1].start..ranges[1].end];
344 assert!(second_div_content.contains("bordered"));
345 assert!(second_div_content.contains("Content here"));
346 }
347
348 #[test]
349 fn test_pandoc_attributes() {
350 assert!(has_pandoc_attributes("# Heading {#custom-id}"));
351 assert!(has_pandoc_attributes("# Heading {.unnumbered}"));
352 assert!(has_pandoc_attributes("{#fig-1 width=\"50%\"}"));
353 assert!(has_pandoc_attributes("{#id .class key=\"value\"}"));
354
355 assert!(!has_pandoc_attributes("# Heading"));
356 assert!(!has_pandoc_attributes("Regular text"));
357 assert!(!has_pandoc_attributes("{}"));
358 }
359
360 #[test]
361 fn test_div_with_title_attribute() {
362 let content = r#"::: {.callout-note title="Important Note"}
363This is the content of the note.
364It can span multiple lines.
365:::
366"#;
367 let ranges = detect_div_block_ranges(content);
368 assert_eq!(ranges.len(), 1);
369 assert!(is_callout_open("::: {.callout-note title=\"Important Note\"}"));
370 }
371
372 #[test]
373 fn test_unclosed_div() {
374 let content = r#"::: {.callout-note}
375This note is never closed.
376"#;
377 let ranges = detect_div_block_ranges(content);
378 assert_eq!(ranges.len(), 1);
379 assert_eq!(ranges[0].end, content.len());
381 }
382
383 #[test]
384 fn test_heading_inside_callout() {
385 let content = r#"::: {.callout-warning}
386## Warning Title
387
388Warning content here.
389:::
390"#;
391 let ranges = detect_div_block_ranges(content);
392 assert_eq!(ranges.len(), 1);
393
394 let div_content = &content[ranges[0].start..ranges[0].end];
395 assert!(div_content.contains("## Warning Title"));
396 }
397
398 #[test]
400 fn test_has_citations() {
401 assert!(has_citations("See @smith2020 for details."));
402 assert!(has_citations("[@smith2020]"));
403 assert!(has_citations("Multiple [@a; @b] citations"));
404 assert!(!has_citations("No citations here"));
405 assert!(has_citations("Email: user@example.com"));
407 }
408
409 #[test]
410 fn test_bracketed_citation_detection() {
411 let content = "See [@smith2020] for more info.";
412 let ranges = find_citation_ranges(content);
413 assert_eq!(ranges.len(), 1);
414 assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020]");
415 }
416
417 #[test]
418 fn test_inline_citation_detection() {
419 let content = "As @smith2020 argues, this is true.";
420 let ranges = find_citation_ranges(content);
421 assert_eq!(ranges.len(), 1);
422 assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
423 }
424
425 #[test]
426 fn test_multiple_citations_in_brackets() {
427 let content = "See [@smith2020; @jones2021] for details.";
428 let ranges = find_citation_ranges(content);
429 assert_eq!(ranges.len(), 1);
430 assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020; @jones2021]");
431 }
432
433 #[test]
434 fn test_citation_with_prefix() {
435 let content = "[see @smith2020, p. 10]";
436 let ranges = find_citation_ranges(content);
437 assert_eq!(ranges.len(), 1);
438 assert_eq!(&content[ranges[0].start..ranges[0].end], "[see @smith2020, p. 10]");
439 }
440
441 #[test]
442 fn test_suppress_author_citation() {
443 let content = "The theory [-@smith2020] states that...";
444 let ranges = find_citation_ranges(content);
445 assert_eq!(ranges.len(), 1);
446 assert_eq!(&content[ranges[0].start..ranges[0].end], "[-@smith2020]");
447 }
448
449 #[test]
450 fn test_mixed_citations() {
451 let content = "@smith2020 argues that [@jones2021] is wrong.";
452 let ranges = find_citation_ranges(content);
453 assert_eq!(ranges.len(), 2);
454 assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
456 assert_eq!(&content[ranges[1].start..ranges[1].end], "[@jones2021]");
458 }
459
460 #[test]
461 fn test_email_not_confused_with_citation() {
462 let content = "Contact user@example.com for help.";
465 let ranges = find_citation_ranges(content);
466 assert!(
468 ranges.is_empty()
469 || !ranges.iter().any(|r| {
470 let s = &content[r.start..r.end];
471 s.contains("example.com")
472 })
473 );
474 }
475}