use regex::Regex;
use std::sync::LazyLock;
use crate::utils::skip_context::ByteRange;
static DIV_OPEN_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*(?:\{[^}]+\}|\S+)").unwrap());
static DIV_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*$").unwrap());
static CALLOUT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^(\s*):::\s*\{[^}]*\.callout-(?:note|warning|tip|important|caution)[^}]*\}").unwrap()
});
static PANDOC_ATTR_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{[^}]+\}").unwrap());
pub fn is_div_open(line: &str) -> bool {
DIV_OPEN_PATTERN.is_match(line)
}
pub fn is_div_close(line: &str) -> bool {
DIV_CLOSE_PATTERN.is_match(line)
}
pub fn is_callout_open(line: &str) -> bool {
CALLOUT_PATTERN.is_match(line)
}
pub fn has_pandoc_attributes(line: &str) -> bool {
PANDOC_ATTR_PATTERN.is_match(line)
}
pub fn get_div_indent(line: &str) -> usize {
let mut indent = 0;
for c in line.chars() {
match c {
' ' => indent += 1,
'\t' => indent += 4, _ => break,
}
}
indent
}
#[derive(Debug, Clone, Default)]
pub struct DivTracker {
indent_stack: Vec<usize>,
}
impl DivTracker {
pub fn new() -> Self {
Self::default()
}
pub fn process_line(&mut self, line: &str) -> bool {
let trimmed = line.trim_start();
if trimmed.starts_with(":::") {
let indent = get_div_indent(line);
if is_div_close(line) {
if let Some(&top_indent) = self.indent_stack.last()
&& top_indent >= indent
{
self.indent_stack.pop();
}
} else if is_div_open(line) {
self.indent_stack.push(indent);
}
}
!self.indent_stack.is_empty()
}
pub fn is_inside_div(&self) -> bool {
!self.indent_stack.is_empty()
}
}
pub fn detect_div_block_ranges(content: &str) -> Vec<ByteRange> {
let mut ranges = Vec::new();
let mut tracker = DivTracker::new();
let mut div_start: Option<usize> = None;
let mut byte_offset = 0;
for line in content.lines() {
let line_len = line.len();
let was_inside = tracker.is_inside_div();
let is_inside = tracker.process_line(line);
if !was_inside && is_inside {
div_start = Some(byte_offset);
}
else if was_inside
&& !is_inside
&& let Some(start) = div_start.take()
{
ranges.push(ByteRange {
start,
end: byte_offset + line_len,
});
}
byte_offset += line_len + 1;
}
if let Some(start) = div_start {
ranges.push(ByteRange {
start,
end: content.len(),
});
}
ranges
}
pub fn is_within_div_block_ranges(ranges: &[ByteRange], position: usize) -> bool {
ranges.iter().any(|r| position >= r.start && position < r.end)
}
static BRACKETED_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\[[^\]]*@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*[^\]]*\]").unwrap()
});
static INLINE_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?:^|[\s\(\[\{,;:])(@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*)").unwrap()
});
#[inline]
pub fn has_citations(text: &str) -> bool {
text.contains('@')
}
pub fn find_citation_ranges(content: &str) -> Vec<ByteRange> {
let mut ranges = Vec::new();
for mat in BRACKETED_CITATION_PATTERN.find_iter(content) {
ranges.push(ByteRange {
start: mat.start(),
end: mat.end(),
});
}
for cap in INLINE_CITATION_PATTERN.captures_iter(content) {
if let Some(mat) = cap.get(1) {
let start = mat.start();
if !ranges.iter().any(|r| start >= r.start && start < r.end) {
ranges.push(ByteRange { start, end: mat.end() });
}
}
}
ranges.sort_by_key(|r| r.start);
ranges
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_div_open_detection() {
assert!(is_div_open("::: {.callout-note}"));
assert!(is_div_open("::: {.callout-warning}"));
assert!(is_div_open("::: {#myid .class}"));
assert!(is_div_open("::: bordered"));
assert!(is_div_open(" ::: {.note}")); assert!(is_div_open("::: {.callout-tip title=\"My Title\"}"));
assert!(!is_div_open(":::")); assert!(!is_div_open("::: ")); assert!(!is_div_open("Regular text"));
assert!(!is_div_open("# Heading"));
assert!(!is_div_open("```python")); }
#[test]
fn test_div_close_detection() {
assert!(is_div_close(":::"));
assert!(is_div_close("::: "));
assert!(is_div_close(" :::"));
assert!(is_div_close(" ::: "));
assert!(!is_div_close("::: {.note}"));
assert!(!is_div_close("::: class"));
assert!(!is_div_close(":::note"));
}
#[test]
fn test_callout_detection() {
assert!(is_callout_open("::: {.callout-note}"));
assert!(is_callout_open("::: {.callout-warning}"));
assert!(is_callout_open("::: {.callout-tip}"));
assert!(is_callout_open("::: {.callout-important}"));
assert!(is_callout_open("::: {.callout-caution}"));
assert!(is_callout_open("::: {#myid .callout-note}"));
assert!(is_callout_open("::: {.callout-note title=\"Title\"}"));
assert!(!is_callout_open("::: {.note}")); assert!(!is_callout_open("::: {.bordered}")); assert!(!is_callout_open("::: callout-note")); }
#[test]
fn test_div_tracker() {
let mut tracker = DivTracker::new();
assert!(tracker.process_line("::: {.callout-note}"));
assert!(tracker.is_inside_div());
assert!(tracker.process_line("This is content."));
assert!(tracker.is_inside_div());
assert!(!tracker.process_line(":::"));
assert!(!tracker.is_inside_div());
}
#[test]
fn test_nested_divs() {
let mut tracker = DivTracker::new();
assert!(tracker.process_line("::: {.outer}"));
assert!(tracker.is_inside_div());
assert!(tracker.process_line(" ::: {.inner}"));
assert!(tracker.is_inside_div());
assert!(tracker.process_line(" Content"));
assert!(tracker.is_inside_div());
assert!(tracker.process_line(" :::"));
assert!(tracker.is_inside_div());
assert!(!tracker.process_line(":::"));
assert!(!tracker.is_inside_div());
}
#[test]
fn test_detect_div_block_ranges() {
let content = r#"# Heading
::: {.callout-note}
This is a note.
:::
Regular text.
::: {.bordered}
Content here.
:::
"#;
let ranges = detect_div_block_ranges(content);
assert_eq!(ranges.len(), 2);
let first_div_content = &content[ranges[0].start..ranges[0].end];
assert!(first_div_content.contains("callout-note"));
assert!(first_div_content.contains("This is a note"));
let second_div_content = &content[ranges[1].start..ranges[1].end];
assert!(second_div_content.contains("bordered"));
assert!(second_div_content.contains("Content here"));
}
#[test]
fn test_pandoc_attributes() {
assert!(has_pandoc_attributes("# Heading {#custom-id}"));
assert!(has_pandoc_attributes("# Heading {.unnumbered}"));
assert!(has_pandoc_attributes("{#fig-1 width=\"50%\"}"));
assert!(has_pandoc_attributes("{#id .class key=\"value\"}"));
assert!(!has_pandoc_attributes("# Heading"));
assert!(!has_pandoc_attributes("Regular text"));
assert!(!has_pandoc_attributes("{}"));
}
#[test]
fn test_div_with_title_attribute() {
let content = r#"::: {.callout-note title="Important Note"}
This is the content of the note.
It can span multiple lines.
:::
"#;
let ranges = detect_div_block_ranges(content);
assert_eq!(ranges.len(), 1);
assert!(is_callout_open("::: {.callout-note title=\"Important Note\"}"));
}
#[test]
fn test_unclosed_div() {
let content = r#"::: {.callout-note}
This note is never closed.
"#;
let ranges = detect_div_block_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(ranges[0].end, content.len());
}
#[test]
fn test_heading_inside_callout() {
let content = r#"::: {.callout-warning}
## Warning Title
Warning content here.
:::
"#;
let ranges = detect_div_block_ranges(content);
assert_eq!(ranges.len(), 1);
let div_content = &content[ranges[0].start..ranges[0].end];
assert!(div_content.contains("## Warning Title"));
}
#[test]
fn test_has_citations() {
assert!(has_citations("See @smith2020 for details."));
assert!(has_citations("[@smith2020]"));
assert!(has_citations("Multiple [@a; @b] citations"));
assert!(!has_citations("No citations here"));
assert!(has_citations("Email: user@example.com"));
}
#[test]
fn test_bracketed_citation_detection() {
let content = "See [@smith2020] for more info.";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020]");
}
#[test]
fn test_inline_citation_detection() {
let content = "As @smith2020 argues, this is true.";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
}
#[test]
fn test_multiple_citations_in_brackets() {
let content = "See [@smith2020; @jones2021] for details.";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020; @jones2021]");
}
#[test]
fn test_citation_with_prefix() {
let content = "[see @smith2020, p. 10]";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "[see @smith2020, p. 10]");
}
#[test]
fn test_suppress_author_citation() {
let content = "The theory [-@smith2020] states that...";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "[-@smith2020]");
}
#[test]
fn test_mixed_citations() {
let content = "@smith2020 argues that [@jones2021] is wrong.";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 2);
assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
assert_eq!(&content[ranges[1].start..ranges[1].end], "[@jones2021]");
}
#[test]
fn test_email_not_confused_with_citation() {
let content = "Contact user@example.com for help.";
let ranges = find_citation_ranges(content);
assert!(
ranges.is_empty()
|| !ranges.iter().any(|r| {
let s = &content[r.start..r.end];
s.contains("example.com")
})
);
}
}