use regex::Regex;
use std::sync::LazyLock;
use crate::utils::skip_context::ByteRange;
static DIV_OPEN_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*(?:\{[^}]+\}|\S+)").unwrap());
static DIV_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*):::\s*$").unwrap());
static CALLOUT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^(\s*):::\s*\{[^}]*\.callout-(?:note|warning|tip|important|caution)[^}]*\}").unwrap()
});
static PANDOC_ATTR_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{[^}]+\}").unwrap());
pub fn is_div_open(line: &str) -> bool {
DIV_OPEN_PATTERN.is_match(line)
}
pub fn is_div_close(line: &str) -> bool {
DIV_CLOSE_PATTERN.is_match(line)
}
pub fn is_callout_open(line: &str) -> bool {
CALLOUT_PATTERN.is_match(line)
}
pub fn has_pandoc_attributes(line: &str) -> bool {
PANDOC_ATTR_PATTERN.is_match(line)
}
pub fn get_div_indent(line: &str) -> usize {
let mut indent = 0;
for c in line.chars() {
match c {
' ' => indent += 1,
'\t' => indent += 4, _ => break,
}
}
indent
}
#[derive(Debug, Clone, Default)]
pub struct DivTracker {
indent_stack: Vec<usize>,
}
impl DivTracker {
pub fn new() -> Self {
Self::default()
}
pub fn process_line(&mut self, line: &str) -> bool {
let trimmed = line.trim_start();
if trimmed.starts_with(":::") {
let indent = get_div_indent(line);
if is_div_close(line) {
if let Some(&top_indent) = self.indent_stack.last()
&& top_indent >= indent
{
self.indent_stack.pop();
}
} else if is_div_open(line) {
self.indent_stack.push(indent);
}
}
!self.indent_stack.is_empty()
}
pub fn is_inside_div(&self) -> bool {
!self.indent_stack.is_empty()
}
pub fn depth(&self) -> usize {
self.indent_stack.len()
}
}
pub fn detect_div_block_ranges(content: &str) -> Vec<ByteRange> {
let mut ranges = Vec::new();
let mut tracker = DivTracker::new();
let mut div_start: Option<usize> = None;
let mut byte_offset = 0;
for line in content.lines() {
let line_len = line.len();
let was_inside = tracker.is_inside_div();
let is_inside = tracker.process_line(line);
if !was_inside && is_inside {
div_start = Some(byte_offset);
}
else if was_inside
&& !is_inside
&& let Some(start) = div_start.take()
{
ranges.push(ByteRange {
start,
end: byte_offset + line_len,
});
}
byte_offset += line_len + 1;
}
if let Some(start) = div_start {
ranges.push(ByteRange {
start,
end: content.len(),
});
}
ranges
}
pub fn is_within_div_block_ranges(ranges: &[ByteRange], position: usize) -> bool {
ranges.iter().any(|r| position >= r.start && position < r.end)
}
pub fn extract_classes(line: &str) -> Vec<String> {
let mut classes = Vec::new();
if let Some(captures) = PANDOC_ATTR_PATTERN.find(line) {
let attr_block = captures.as_str();
let inner = attr_block.trim_start_matches('{').trim_end_matches('}').trim();
for part in inner.split_whitespace() {
if let Some(class) = part.strip_prefix('.') {
let class = class.split('=').next().unwrap_or(class);
if !class.is_empty() {
classes.push(class.to_string());
}
}
}
}
classes
}
pub fn extract_id(line: &str) -> Option<String> {
if let Some(captures) = PANDOC_ATTR_PATTERN.find(line) {
let attr_block = captures.as_str();
let inner = attr_block.trim_start_matches('{').trim_end_matches('}').trim();
for part in inner.split_whitespace() {
if let Some(id) = part.strip_prefix('#') {
let id = id.split('=').next().unwrap_or(id);
if !id.is_empty() {
return Some(id.to_string());
}
}
}
}
None
}
static BRACKETED_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\[[^\]]*@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*[^\]]*\]").unwrap()
});
static INLINE_CITATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?:^|[\s\(\[\{,;:])(@[a-zA-Z0-9_][a-zA-Z0-9_:.#$%&\-+?<>~/]*)").unwrap()
});
#[inline]
pub fn has_citations(text: &str) -> bool {
text.contains('@')
}
pub fn find_citation_ranges(content: &str) -> Vec<ByteRange> {
let mut ranges = Vec::new();
for mat in BRACKETED_CITATION_PATTERN.find_iter(content) {
ranges.push(ByteRange {
start: mat.start(),
end: mat.end(),
});
}
for cap in INLINE_CITATION_PATTERN.captures_iter(content) {
if let Some(mat) = cap.get(1) {
let start = mat.start();
if !ranges.iter().any(|r| start >= r.start && start < r.end) {
ranges.push(ByteRange { start, end: mat.end() });
}
}
}
ranges.sort_by_key(|r| r.start);
ranges
}
pub fn is_in_citation(ranges: &[ByteRange], position: usize) -> bool {
ranges.iter().any(|r| position >= r.start && position < r.end)
}
pub fn extract_citation_key(citation: &str) -> Option<&str> {
citation.strip_prefix('@').or_else(|| {
citation.strip_prefix("[-@").and_then(|s| s.strip_suffix(']'))
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_div_open_detection() {
assert!(is_div_open("::: {.callout-note}"));
assert!(is_div_open("::: {.callout-warning}"));
assert!(is_div_open("::: {#myid .class}"));
assert!(is_div_open("::: bordered"));
assert!(is_div_open(" ::: {.note}")); assert!(is_div_open("::: {.callout-tip title=\"My Title\"}"));
assert!(!is_div_open(":::")); assert!(!is_div_open("::: ")); assert!(!is_div_open("Regular text"));
assert!(!is_div_open("# Heading"));
assert!(!is_div_open("```python")); }
#[test]
fn test_div_close_detection() {
assert!(is_div_close(":::"));
assert!(is_div_close("::: "));
assert!(is_div_close(" :::"));
assert!(is_div_close(" ::: "));
assert!(!is_div_close("::: {.note}"));
assert!(!is_div_close("::: class"));
assert!(!is_div_close(":::note"));
}
#[test]
fn test_callout_detection() {
assert!(is_callout_open("::: {.callout-note}"));
assert!(is_callout_open("::: {.callout-warning}"));
assert!(is_callout_open("::: {.callout-tip}"));
assert!(is_callout_open("::: {.callout-important}"));
assert!(is_callout_open("::: {.callout-caution}"));
assert!(is_callout_open("::: {#myid .callout-note}"));
assert!(is_callout_open("::: {.callout-note title=\"Title\"}"));
assert!(!is_callout_open("::: {.note}")); assert!(!is_callout_open("::: {.bordered}")); assert!(!is_callout_open("::: callout-note")); }
#[test]
fn test_div_tracker() {
let mut tracker = DivTracker::new();
assert!(tracker.process_line("::: {.callout-note}"));
assert!(tracker.is_inside_div());
assert_eq!(tracker.depth(), 1);
assert!(tracker.process_line("This is content."));
assert!(tracker.is_inside_div());
assert!(!tracker.process_line(":::"));
assert!(!tracker.is_inside_div());
assert_eq!(tracker.depth(), 0);
}
#[test]
fn test_nested_divs() {
let mut tracker = DivTracker::new();
assert!(tracker.process_line("::: {.outer}"));
assert_eq!(tracker.depth(), 1);
assert!(tracker.process_line(" ::: {.inner}"));
assert_eq!(tracker.depth(), 2);
assert!(tracker.process_line(" Content"));
assert!(tracker.is_inside_div());
assert!(tracker.process_line(" :::"));
assert_eq!(tracker.depth(), 1);
assert!(!tracker.process_line(":::"));
assert_eq!(tracker.depth(), 0);
}
#[test]
fn test_detect_div_block_ranges() {
let content = r#"# Heading
::: {.callout-note}
This is a note.
:::
Regular text.
::: {.bordered}
Content here.
:::
"#;
let ranges = detect_div_block_ranges(content);
assert_eq!(ranges.len(), 2);
let first_div_content = &content[ranges[0].start..ranges[0].end];
assert!(first_div_content.contains("callout-note"));
assert!(first_div_content.contains("This is a note"));
let second_div_content = &content[ranges[1].start..ranges[1].end];
assert!(second_div_content.contains("bordered"));
assert!(second_div_content.contains("Content here"));
}
#[test]
fn test_extract_classes() {
assert_eq!(extract_classes("::: {.callout-note}"), vec!["callout-note"]);
assert_eq!(
extract_classes("::: {#myid .bordered .highlighted}"),
vec!["bordered", "highlighted"]
);
assert_eq!(
extract_classes("::: {.callout-warning title=\"Alert\"}"),
vec!["callout-warning"]
);
assert!(extract_classes("Regular text").is_empty());
assert!(extract_classes("::: classname").is_empty()); }
#[test]
fn test_extract_id() {
assert_eq!(extract_id("::: {#myid}"), Some("myid".to_string()));
assert_eq!(extract_id("::: {#myid .class}"), Some("myid".to_string()));
assert_eq!(extract_id("::: {.class #custom-id}"), Some("custom-id".to_string()));
assert_eq!(extract_id("::: {.class}"), None);
assert_eq!(extract_id("Regular text"), None);
}
#[test]
fn test_pandoc_attributes() {
assert!(has_pandoc_attributes("# Heading {#custom-id}"));
assert!(has_pandoc_attributes("# Heading {.unnumbered}"));
assert!(has_pandoc_attributes("{#fig-1 width=\"50%\"}"));
assert!(has_pandoc_attributes("{#id .class key=\"value\"}"));
assert!(!has_pandoc_attributes("# Heading"));
assert!(!has_pandoc_attributes("Regular text"));
assert!(!has_pandoc_attributes("{}"));
}
#[test]
fn test_div_with_title_attribute() {
let content = r#"::: {.callout-note title="Important Note"}
This is the content of the note.
It can span multiple lines.
:::
"#;
let ranges = detect_div_block_ranges(content);
assert_eq!(ranges.len(), 1);
assert!(is_callout_open("::: {.callout-note title=\"Important Note\"}"));
}
#[test]
fn test_unclosed_div() {
let content = r#"::: {.callout-note}
This note is never closed.
"#;
let ranges = detect_div_block_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(ranges[0].end, content.len());
}
#[test]
fn test_heading_inside_callout() {
let content = r#"::: {.callout-warning}
## Warning Title
Warning content here.
:::
"#;
let ranges = detect_div_block_ranges(content);
assert_eq!(ranges.len(), 1);
let div_content = &content[ranges[0].start..ranges[0].end];
assert!(div_content.contains("## Warning Title"));
}
#[test]
fn test_has_citations() {
assert!(has_citations("See @smith2020 for details."));
assert!(has_citations("[@smith2020]"));
assert!(has_citations("Multiple [@a; @b] citations"));
assert!(!has_citations("No citations here"));
assert!(has_citations("Email: user@example.com"));
}
#[test]
fn test_bracketed_citation_detection() {
let content = "See [@smith2020] for more info.";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020]");
}
#[test]
fn test_inline_citation_detection() {
let content = "As @smith2020 argues, this is true.";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
}
#[test]
fn test_multiple_citations_in_brackets() {
let content = "See [@smith2020; @jones2021] for details.";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "[@smith2020; @jones2021]");
}
#[test]
fn test_citation_with_prefix() {
let content = "[see @smith2020, p. 10]";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "[see @smith2020, p. 10]");
}
#[test]
fn test_suppress_author_citation() {
let content = "The theory [-@smith2020] states that...";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 1);
assert_eq!(&content[ranges[0].start..ranges[0].end], "[-@smith2020]");
}
#[test]
fn test_mixed_citations() {
let content = "@smith2020 argues that [@jones2021] is wrong.";
let ranges = find_citation_ranges(content);
assert_eq!(ranges.len(), 2);
assert_eq!(&content[ranges[0].start..ranges[0].end], "@smith2020");
assert_eq!(&content[ranges[1].start..ranges[1].end], "[@jones2021]");
}
#[test]
fn test_citation_key_extraction() {
assert_eq!(extract_citation_key("@smith2020"), Some("smith2020"));
assert_eq!(extract_citation_key("@Smith_2020"), Some("Smith_2020"));
assert_eq!(extract_citation_key("@key:with:colons"), Some("key:with:colons"));
assert_eq!(extract_citation_key("not-a-citation"), None);
}
#[test]
fn test_is_in_citation() {
let content = "See [@smith2020] here.";
let ranges = find_citation_ranges(content);
assert!(is_in_citation(&ranges, 5)); assert!(!is_in_citation(&ranges, 0)); assert!(!is_in_citation(&ranges, 17)); }
#[test]
fn test_email_not_confused_with_citation() {
let content = "Contact user@example.com for help.";
let ranges = find_citation_ranges(content);
assert!(
ranges.is_empty()
|| !ranges.iter().any(|r| {
let s = &content[r.start..r.end];
s.contains("example.com")
})
);
}
}