use regex::Regex;
use std::sync::LazyLock;
pub static ATTR_LIST_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"\{:?\s*(?:(?:#[a-zA-Z0-9_][a-zA-Z0-9_-]*|\.[a-zA-Z_][a-zA-Z0-9_-]*|[a-zA-Z_][a-zA-Z0-9_-]*=["'][^"']*["'])\s*)+\}"#).unwrap()
});
static CUSTOM_ID_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"#([a-zA-Z0-9_][a-zA-Z0-9_-]*)").unwrap());
static CLASS_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\.([a-zA-Z_][a-zA-Z0-9_-]*)").unwrap());
static KEY_VALUE_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"([a-zA-Z_][a-zA-Z0-9_-]*)=["']([^"']*)["']"#).unwrap());
#[derive(Debug, Clone, Default, PartialEq)]
pub struct AttrList {
pub id: Option<String>,
pub classes: Vec<String>,
pub attributes: Vec<(String, String)>,
pub start: usize,
pub end: usize,
}
impl AttrList {
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn has_id(&self) -> bool {
self.id.is_some()
}
#[inline]
pub fn has_classes(&self) -> bool {
!self.classes.is_empty()
}
#[inline]
pub fn has_attributes(&self) -> bool {
!self.attributes.is_empty()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.id.is_none() && self.classes.is_empty() && self.attributes.is_empty()
}
}
#[inline]
pub fn contains_attr_list(line: &str) -> bool {
if !line.contains('{') {
return false;
}
ATTR_LIST_PATTERN.is_match(line)
}
#[inline]
pub fn is_standalone_attr_list(line: &str) -> bool {
let trimmed = line.trim();
if !trimmed.starts_with('{') || !trimmed.ends_with('}') {
return false;
}
ATTR_LIST_PATTERN.is_match(trimmed)
}
#[inline]
pub fn is_mkdocs_anchor_line(line: &str) -> bool {
let trimmed = line.trim();
if !trimmed.starts_with("[]()") {
return false;
}
let after_link = &trimmed[4..];
if !after_link.contains('{') {
return false;
}
let attr_start = after_link.trim_start();
if !attr_start.starts_with('{') {
return false;
}
let Some(close_idx) = attr_start.find('}') else {
return false;
};
if !attr_start[close_idx + 1..].trim().is_empty() {
return false;
}
let attr_content = &attr_start[..=close_idx];
if !ATTR_LIST_PATTERN.is_match(attr_content) {
return false;
}
let attrs = find_attr_lists(attr_content);
attrs.iter().any(|a| a.has_id() || a.has_classes())
}
pub fn find_attr_lists(line: &str) -> Vec<AttrList> {
if !line.contains('{') {
return Vec::new();
}
let mut results = Vec::new();
for m in ATTR_LIST_PATTERN.find_iter(line) {
let attr_text = m.as_str();
let mut attr_list = AttrList {
start: m.start(),
end: m.end(),
..Default::default()
};
if let Some(caps) = CUSTOM_ID_PATTERN.captures(attr_text)
&& let Some(id_match) = caps.get(1)
{
attr_list.id = Some(id_match.as_str().to_string());
}
for caps in CLASS_PATTERN.captures_iter(attr_text) {
if let Some(class_match) = caps.get(1) {
attr_list.classes.push(class_match.as_str().to_string());
}
}
for caps in KEY_VALUE_PATTERN.captures_iter(attr_text) {
if let Some(key) = caps.get(1)
&& let Some(value) = caps.get(2)
{
attr_list
.attributes
.push((key.as_str().to_string(), value.as_str().to_string()));
}
}
if !attr_list.is_empty() {
results.push(attr_list);
}
}
results
}
pub fn extract_heading_custom_id(line: &str) -> Option<String> {
let attrs = find_attr_lists(line);
attrs.into_iter().find_map(|a| a.id)
}
pub fn strip_attr_list_from_heading(text: &str) -> String {
if let Some(m) = ATTR_LIST_PATTERN.find(text) {
let after = &text[m.end()..];
if after.trim().is_empty() {
return text[..m.start()].trim_end().to_string();
}
}
text.to_string()
}
pub fn is_in_attr_list(line: &str, position: usize) -> bool {
for m in ATTR_LIST_PATTERN.find_iter(line) {
if m.start() <= position && position < m.end() {
return true;
}
}
false
}
pub fn extract_all_custom_anchors(content: &str) -> Vec<(String, usize)> {
let mut anchors = Vec::new();
for (line_idx, line) in content.lines().enumerate() {
let line_num = line_idx + 1;
for attr_list in find_attr_lists(line) {
if let Some(id) = attr_list.id {
anchors.push((id, line_num));
}
}
}
anchors
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_contains_attr_list() {
assert!(contains_attr_list("# Heading {#custom-id}"));
assert!(contains_attr_list("# Heading {.my-class}"));
assert!(contains_attr_list("# Heading {#id .class}"));
assert!(contains_attr_list("Text {: #id}"));
assert!(contains_attr_list("Link {target=\"_blank\"}"));
assert!(!contains_attr_list("# Regular heading"));
assert!(!contains_attr_list("Code with {braces}"));
assert!(!contains_attr_list("Empty {}"));
assert!(!contains_attr_list("Just text"));
}
#[test]
fn test_find_attr_lists_basic() {
let attrs = find_attr_lists("# Heading {#custom-id}");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("custom-id".to_string()));
assert!(attrs[0].classes.is_empty());
}
#[test]
fn test_find_attr_lists_with_class() {
let attrs = find_attr_lists("# Heading {.highlight}");
assert_eq!(attrs.len(), 1);
assert!(attrs[0].id.is_none());
assert_eq!(attrs[0].classes, vec!["highlight"]);
}
#[test]
fn test_find_attr_lists_complex() {
let attrs = find_attr_lists("# Heading {#my-id .class1 .class2 data-value=\"test\"}");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("my-id".to_string()));
assert_eq!(attrs[0].classes, vec!["class1", "class2"]);
assert_eq!(
attrs[0].attributes,
vec![("data-value".to_string(), "test".to_string())]
);
}
#[test]
fn test_find_attr_lists_kramdown_style() {
let attrs = find_attr_lists("Paragraph {: #para-id .special }");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("para-id".to_string()));
assert_eq!(attrs[0].classes, vec!["special"]);
}
#[test]
fn test_extract_heading_custom_id() {
assert_eq!(
extract_heading_custom_id("# Heading {#my-anchor}"),
Some("my-anchor".to_string())
);
assert_eq!(
extract_heading_custom_id("## Title {#title .class}"),
Some("title".to_string())
);
assert_eq!(extract_heading_custom_id("# No ID {.class-only}"), None);
assert_eq!(extract_heading_custom_id("# Plain heading"), None);
}
#[test]
fn test_strip_attr_list_from_heading() {
assert_eq!(strip_attr_list_from_heading("Heading {#my-id}"), "Heading");
assert_eq!(strip_attr_list_from_heading("Title {#id .class}"), "Title");
assert_eq!(
strip_attr_list_from_heading("Multi Word Title {#anchor}"),
"Multi Word Title"
);
assert_eq!(strip_attr_list_from_heading("No attributes"), "No attributes");
assert_eq!(strip_attr_list_from_heading("Before {#id} after"), "Before {#id} after");
}
#[test]
fn test_is_in_attr_list() {
let line = "Some text {#my-id} more text";
assert!(!is_in_attr_list(line, 0)); assert!(!is_in_attr_list(line, 8)); assert!(is_in_attr_list(line, 10)); assert!(is_in_attr_list(line, 15)); assert!(!is_in_attr_list(line, 19)); }
#[test]
fn test_extract_all_custom_anchors() {
let content = r#"# First Heading {#first}
Some paragraph {: #para-id}
## Second {#second .class}
No ID here.
### Third {.class-only}
{#standalone-id}
"#;
let anchors = extract_all_custom_anchors(content);
assert_eq!(anchors.len(), 4);
assert_eq!(anchors[0], ("first".to_string(), 1));
assert_eq!(anchors[1], ("para-id".to_string(), 3));
assert_eq!(anchors[2], ("second".to_string(), 5));
assert_eq!(anchors[3], ("standalone-id".to_string(), 11));
}
#[test]
fn test_multiple_attr_lists_same_line() {
let attrs = find_attr_lists("[link]{#link-id} and [other]{#other-id}");
assert_eq!(attrs.len(), 2);
assert_eq!(attrs[0].id, Some("link-id".to_string()));
assert_eq!(attrs[1].id, Some("other-id".to_string()));
}
#[test]
fn test_attr_list_positions() {
let line = "Text {#my-id} more";
let attrs = find_attr_lists(line);
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].start, 5);
assert_eq!(attrs[0].end, 13);
assert_eq!(&line[attrs[0].start..attrs[0].end], "{#my-id}");
}
#[test]
fn test_underscore_in_identifiers() {
let attrs = find_attr_lists("# Heading {#my_custom_id .my_class}");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("my_custom_id".to_string()));
assert_eq!(attrs[0].classes, vec!["my_class"]);
}
#[test]
fn test_is_standalone_attr_list() {
assert!(is_standalone_attr_list("{ .class-name }"));
assert!(is_standalone_attr_list("{: .class-name }"));
assert!(is_standalone_attr_list("{#custom-id}"));
assert!(is_standalone_attr_list("{: #custom-id .class }"));
assert!(is_standalone_attr_list(" { .indented } "));
assert!(!is_standalone_attr_list("Some text {#id}"));
assert!(!is_standalone_attr_list("{#id} more text"));
assert!(!is_standalone_attr_list("# Heading {#id}"));
assert!(!is_standalone_attr_list("{ }"));
assert!(!is_standalone_attr_list("{}"));
assert!(!is_standalone_attr_list("{ random text }"));
assert!(!is_standalone_attr_list(""));
assert!(!is_standalone_attr_list(" "));
}
#[test]
fn test_is_mkdocs_anchor_line_basic() {
assert!(is_mkdocs_anchor_line("[](){ #example }"));
assert!(is_mkdocs_anchor_line("[](){#example}"));
assert!(is_mkdocs_anchor_line("[](){ #my-anchor }"));
assert!(is_mkdocs_anchor_line("[](){ #anchor_with_underscore }"));
assert!(is_mkdocs_anchor_line("[](){ .highlight }"));
assert!(is_mkdocs_anchor_line("[](){.my-class}"));
assert!(is_mkdocs_anchor_line("[](){ #anchor .class }"));
assert!(is_mkdocs_anchor_line("[](){ .class #anchor }"));
assert!(is_mkdocs_anchor_line("[](){ #id .class1 .class2 }"));
}
#[test]
fn test_is_mkdocs_anchor_line_kramdown_style() {
assert!(is_mkdocs_anchor_line("[](){: #anchor }"));
assert!(is_mkdocs_anchor_line("[](){:#anchor}"));
assert!(is_mkdocs_anchor_line("[](){: .class }"));
assert!(is_mkdocs_anchor_line("[](){: #id .class }"));
}
#[test]
fn test_is_mkdocs_anchor_line_whitespace_variations() {
assert!(is_mkdocs_anchor_line(" [](){ #example }"));
assert!(is_mkdocs_anchor_line("[](){ #example } "));
assert!(is_mkdocs_anchor_line(" [](){ #example } "));
assert!(is_mkdocs_anchor_line("\t[](){ #example }\t"));
assert!(is_mkdocs_anchor_line("[]() { #example }"));
assert!(is_mkdocs_anchor_line("[]()\t{ #example }"));
assert!(is_mkdocs_anchor_line("[](){#example}"));
}
#[test]
fn test_is_mkdocs_anchor_line_not_anchor_lines() {
assert!(!is_mkdocs_anchor_line("[]()"));
assert!(!is_mkdocs_anchor_line("[](){ }"));
assert!(!is_mkdocs_anchor_line("[](){}"));
assert!(!is_mkdocs_anchor_line("[](url)"));
assert!(!is_mkdocs_anchor_line("[text](url)"));
assert!(!is_mkdocs_anchor_line("[text](url){ #id }"));
assert!(!is_mkdocs_anchor_line("[](){ #anchor } extra text"));
assert!(!is_mkdocs_anchor_line("[](){ #anchor } <!-- comment -->"));
assert!(!is_mkdocs_anchor_line("text [](){ #anchor }"));
assert!(!is_mkdocs_anchor_line("# Heading [](){ #anchor }"));
assert!(!is_mkdocs_anchor_line("# Heading"));
assert!(!is_mkdocs_anchor_line("Some paragraph text"));
assert!(!is_mkdocs_anchor_line("{ #standalone-attr }"));
assert!(!is_mkdocs_anchor_line("[]{#anchor}")); assert!(!is_mkdocs_anchor_line("[](#anchor)")); assert!(!is_mkdocs_anchor_line("[](){ #anchor")); }
#[test]
fn test_is_mkdocs_anchor_line_edge_cases() {
assert!(!is_mkdocs_anchor_line(""));
assert!(!is_mkdocs_anchor_line(" "));
assert!(!is_mkdocs_anchor_line("\t"));
assert!(!is_mkdocs_anchor_line("{}"));
assert!(!is_mkdocs_anchor_line("{ }"));
assert!(is_mkdocs_anchor_line("[](){ #id data-value=\"test\" }"));
assert!(is_mkdocs_anchor_line("[](){ #first #second }"));
}
#[test]
fn test_is_mkdocs_anchor_line_real_world_examples() {
assert!(is_mkdocs_anchor_line("[](){ #installation }"));
assert!(is_mkdocs_anchor_line("[](){ #getting-started }"));
assert!(is_mkdocs_anchor_line("[](){ #api-reference }"));
assert!(is_mkdocs_anchor_line("[](){ .annotate }"));
assert!(is_mkdocs_anchor_line("[](){ #note .warning }"));
}
#[test]
fn test_attr_list_pattern_digit_starting_ids() {
assert!(contains_attr_list("{#3rd-party}"));
assert!(contains_attr_list("{ #3rd-party }"));
assert!(contains_attr_list("{#1}"));
assert!(contains_attr_list("{#123-foo}"));
assert!(contains_attr_list("{#1st-section}"));
assert!(contains_attr_list("{#2nd_item}"));
assert!(contains_attr_list("{#3rd-party .glossary}"));
assert!(contains_attr_list("{: #3rd-party}"));
}
#[test]
fn test_custom_id_extraction_digit_starting() {
let attrs = find_attr_lists("{#3rd-party}");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("3rd-party".to_string()));
let attrs = find_attr_lists("{#1}");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("1".to_string()));
let attrs = find_attr_lists("{#123-foo}");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("123-foo".to_string()));
let attrs = find_attr_lists("{#1st-section}");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("1st-section".to_string()));
let attrs = find_attr_lists("{#2nd_item}");
assert_eq!(attrs.len(), 1);
assert_eq!(attrs[0].id, Some("2nd_item".to_string()));
}
#[test]
fn test_class_pattern_still_rejects_digit_starting() {
let attrs = find_attr_lists("{.3invalid}");
assert_eq!(attrs.len(), 0, "Digit-starting class names should not be matched");
}
#[test]
fn test_mkdocs_anchor_line_digit_starting_id() {
assert!(is_mkdocs_anchor_line("[](){ #3rd-party }"));
assert!(is_mkdocs_anchor_line("[](){ #1 }"));
assert!(is_mkdocs_anchor_line("[](){ #123-section }"));
}
}