Skip to main content

eure_mark/
reference.rs

1//! Reference extraction from markdown content
2
3use regex::Regex;
4use std::sync::LazyLock;
5
6use crate::error::ReferenceType;
7
8/// A reference found in markdown content with position information
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Reference {
11    /// Type of reference
12    pub ref_type: ReferenceType,
13    /// The key being referenced
14    pub key: String,
15    /// Byte offset of the reference start in the content
16    pub offset: u32,
17    /// Byte length of the entire reference string (e.g., "!cite[key]")
18    pub len: u32,
19}
20
21/// Regex patterns for extracting references
22static CITE_PATTERN: LazyLock<Regex> =
23    LazyLock::new(|| Regex::new(r"!cite\[([^\]]+)\]").expect("invalid cite regex"));
24
25static FOOTNOTE_PATTERN: LazyLock<Regex> =
26    LazyLock::new(|| Regex::new(r"!footnote\[([^\]]+)\]").expect("invalid footnote regex"));
27
28static REF_PATTERN: LazyLock<Regex> =
29    LazyLock::new(|| Regex::new(r"!ref\[([^\]]+)\]").expect("invalid ref regex"));
30
31/// Extract all references from markdown content with positions
32pub fn extract_references(content: &str) -> Vec<Reference> {
33    let mut refs = Vec::new();
34
35    // Extract !cite[key] references (can have multiple keys separated by comma)
36    for cap in CITE_PATTERN.captures_iter(content) {
37        let full_match = cap.get(0).unwrap();
38        let keys = &cap[1];
39
40        // For comma-separated keys, each key gets the same span (the whole !cite[...] match)
41        for key in keys.split(',') {
42            let trimmed = key.trim();
43            refs.push(Reference {
44                ref_type: ReferenceType::Cite,
45                key: trimmed.to_string(),
46                // For the whole !cite[...] match
47                offset: full_match.start() as u32,
48                len: full_match.len() as u32,
49            });
50        }
51    }
52
53    // Extract !footnote[key] references
54    for cap in FOOTNOTE_PATTERN.captures_iter(content) {
55        let full_match = cap.get(0).unwrap();
56        refs.push(Reference {
57            ref_type: ReferenceType::Footnote,
58            key: cap[1].to_string(),
59            offset: full_match.start() as u32,
60            len: full_match.len() as u32,
61        });
62    }
63
64    // Extract !ref[key] references
65    for cap in REF_PATTERN.captures_iter(content) {
66        let full_match = cap.get(0).unwrap();
67        refs.push(Reference {
68            ref_type: ReferenceType::Section,
69            key: cap[1].to_string(),
70            offset: full_match.start() as u32,
71            len: full_match.len() as u32,
72        });
73    }
74
75    refs
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    #[test]
83    fn test_extract_cite() {
84        let refs = extract_references("See !cite[knuth1984] for details.");
85        assert_eq!(refs.len(), 1);
86        assert_eq!(refs[0].ref_type, ReferenceType::Cite);
87        assert_eq!(refs[0].key, "knuth1984");
88        assert_eq!(refs[0].offset, 4); // "See " = 4 chars
89        assert_eq!(refs[0].len, 16); // "!cite[knuth1984]" = 16 chars
90    }
91
92    #[test]
93    fn test_extract_multiple_cites() {
94        let refs = extract_references("See !cite[knuth1984, lamport1994] for details.");
95        assert_eq!(refs.len(), 2);
96        assert_eq!(refs[0].key, "knuth1984");
97        assert_eq!(refs[1].key, "lamport1994");
98    }
99
100    #[test]
101    fn test_extract_footnote() {
102        let refs = extract_references("This is important!footnote[note1].");
103        assert_eq!(refs.len(), 1);
104        assert_eq!(refs[0].ref_type, ReferenceType::Footnote);
105        assert_eq!(refs[0].key, "note1");
106    }
107
108    #[test]
109    fn test_extract_ref() {
110        let refs = extract_references("See !ref[intro] for more.");
111        assert_eq!(refs.len(), 1);
112        assert_eq!(refs[0].ref_type, ReferenceType::Section);
113        assert_eq!(refs[0].key, "intro");
114    }
115
116    #[test]
117    fn test_extract_mixed() {
118        let content = "See !cite[knuth1984] and !footnote[note1]. Also !ref[intro].";
119        let refs = extract_references(content);
120        assert_eq!(refs.len(), 3);
121    }
122}