use once_cell::sync::Lazy;
use regex::Regex;
pub(crate) struct Patterns {
pub inline_citations: Regex,
pub reference_definitions: Regex,
pub reference_header: Regex,
pub reference_entry: Regex,
pub multiple_whitespace: Regex,
pub excessive_newlines: Regex,
}
pub(crate) static PATTERNS: Lazy<Patterns> = Lazy::new(|| Patterns {
inline_citations: Regex::new(
r"(?x)
# Match any of these inline citation formats:
(?:
# Footnote-style with caret: [^identifier]
# Matches: [^1], [^note], [^1_1], [^section_note], etc.
\[\^[a-zA-Z0-9_\-]+\]
|
# Numeric citations: [1], [2], [123]
\[\d+\]
|
# Named citations: [source:1], [ref:2], [cite:3], [note:4]
\[(?:source|ref|cite|note|fig|table|eq):[a-zA-Z0-9_\-]+\]
)
",
)
.unwrap(),
reference_definitions: Regex::new(
r"(?xm)
^ # Start of line
(?:
# Footnote-style definitions: [^1]: or [^1_1]:
\[\^[a-zA-Z0-9_\-]+\]:\s*.*
|
# Standard numeric with colon: [1]: url
\[\d+\]:\s*.*
|
# Standard numeric with space: [1] url (less common)
\[\d+\]\s+\S.*
|
# Markdown link format: [1](url) or [^1_1](url)
\[(?:\^)?[a-zA-Z0-9_\-]+\]\(https?://[^\)]+\)
)
$ # End of line
",
)
.unwrap(),
reference_header: Regex::new(
r"(?m)^#{1,6}\s*(?:References?|Citations?|Sources?|Bibliography|Notes?)\s*$",
)
.unwrap(),
reference_entry: Regex::new(
r"(?xm)
^ # Start of line
\[(?:\^)?[a-zA-Z0-9_\-]+\] # Citation marker (with optional ^)
\s+ # Required whitespace
[^\n]+ # Rest of line (non-empty)
$ # End of line
",
)
.unwrap(),
multiple_whitespace: Regex::new(r" {2,}").unwrap(),
excessive_newlines: Regex::new(r"\n{3,}").unwrap(),
});
impl Patterns {
pub fn get() -> &'static Patterns {
&PATTERNS
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_inline_citations_numeric() {
let patterns = Patterns::get();
assert!(patterns.inline_citations.is_match("[1]"));
assert!(patterns.inline_citations.is_match("[123]"));
assert!(!patterns.inline_citations.is_match("[abc]"));
assert!(!patterns.inline_citations.is_match("[]"));
}
#[test]
fn test_inline_citations_footnote_style() {
let patterns = Patterns::get();
assert!(patterns.inline_citations.is_match("[^1]"));
assert!(patterns.inline_citations.is_match("[^note]"));
assert!(patterns.inline_citations.is_match("[^1_1]"));
assert!(patterns.inline_citations.is_match("[^1_23]"));
assert!(patterns.inline_citations.is_match("[^2_1]"));
assert!(patterns.inline_citations.is_match("[^section-1]"));
assert!(patterns.inline_citations.is_match("[^note-main]"));
}
#[test]
fn test_inline_citations_named() {
let patterns = Patterns::get();
assert!(patterns.inline_citations.is_match("[source:1]"));
assert!(patterns.inline_citations.is_match("[ref:2]"));
assert!(patterns.inline_citations.is_match("[cite:3]"));
assert!(patterns.inline_citations.is_match("[note:4]"));
assert!(patterns.inline_citations.is_match("[fig:1]"));
assert!(!patterns.inline_citations.is_match("[other:1]"));
}
#[test]
fn test_reference_definitions_standard() {
let patterns = Patterns::get();
assert!(patterns
.reference_definitions
.is_match("[1]: https://example.com"));
assert!(patterns
.reference_definitions
.is_match("[2] https://example.com"));
assert!(patterns
.reference_definitions
.is_match("[123]: https://example.com \"Title\""));
}
#[test]
fn test_reference_definitions_footnote() {
let patterns = Patterns::get();
assert!(patterns.reference_definitions.is_match("[^1]: Some text"));
assert!(patterns
.reference_definitions
.is_match("[^note]: Description"));
assert!(patterns
.reference_definitions
.is_match("[^1_1]: https://example.com"));
assert!(patterns
.reference_definitions
.is_match("[^1_23]: https://example.com/page"));
assert!(patterns
.reference_definitions
.is_match("[^2_1]: Some reference text"));
}
#[test]
fn test_reference_definitions_markdown_link() {
let patterns = Patterns::get();
assert!(patterns
.reference_definitions
.is_match("[1](https://example.com)"));
assert!(patterns
.reference_definitions
.is_match("[123](https://example.com/page)"));
assert!(patterns
.reference_definitions
.is_match("[^1_1](https://example.com)"));
assert!(patterns
.reference_definitions
.is_match("[^2_5](https://example.com/page)"));
}
#[test]
fn test_reference_header_pattern() {
let patterns = Patterns::get();
assert!(patterns.reference_header.is_match("## References"));
assert!(patterns.reference_header.is_match("# Citations"));
assert!(patterns.reference_header.is_match("### Sources"));
assert!(patterns.reference_header.is_match("#### Bibliography"));
assert!(!patterns.reference_header.is_match("## Other Section"));
}
#[test]
fn test_reference_entry_pattern() {
let patterns = Patterns::get();
assert!(patterns
.reference_entry
.is_match("[1] Author, A. (2024). Title."));
assert!(patterns
.reference_entry
.is_match("[^1] Some reference text"));
assert!(patterns
.reference_entry
.is_match("[^1_1] Reference with underscore"));
}
}