rumdl_lib/utils/
mkdocs_abbreviations.rs1use regex::Regex;
31use std::sync::LazyLock;
32
33static ABBREVIATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\*\[([^\]]+)\]:\s*(.*)$").unwrap());
39
40#[derive(Debug, Clone, PartialEq)]
42pub struct Abbreviation {
43 pub abbr: String,
45 pub definition: String,
47 pub line: usize,
49}
50
51#[inline]
53pub fn is_abbreviation_definition(line: &str) -> bool {
54 if !line.trim_start().starts_with("*[") {
56 return false;
57 }
58 ABBREVIATION_PATTERN.is_match(line)
59}
60
61#[inline]
63pub fn might_be_abbreviation(line: &str) -> bool {
64 let trimmed = line.trim_start();
65 trimmed.starts_with("*[") && trimmed.contains("]:")
66}
67
68pub fn parse_abbreviation(line: &str, line_num: usize) -> Option<Abbreviation> {
84 if let Some(caps) = ABBREVIATION_PATTERN.captures(line) {
85 let abbr = caps.get(1)?.as_str().to_string();
86 let definition = caps.get(2).map(|m| m.as_str().to_string()).unwrap_or_default();
87
88 Some(Abbreviation {
89 abbr,
90 definition,
91 line: line_num,
92 })
93 } else {
94 None
95 }
96}
97
98pub fn extract_abbreviations(content: &str) -> Vec<Abbreviation> {
103 let mut abbreviations = Vec::new();
104
105 for (line_idx, line) in content.lines().enumerate() {
106 if let Some(abbr) = parse_abbreviation(line, line_idx + 1) {
107 abbreviations.push(abbr);
108 }
109 }
110
111 abbreviations
112}
113
114pub fn is_in_abbreviation_definition(line: &str, position: usize) -> bool {
116 if is_abbreviation_definition(line) {
118 return position < line.len();
119 }
120 false
121}
122
123pub fn get_abbreviation_terms(content: &str) -> Vec<String> {
128 extract_abbreviations(content).into_iter().map(|a| a.abbr).collect()
129}
130
131pub fn is_defined_abbreviation(content: &str, word: &str) -> bool {
136 for line in content.lines() {
137 if let Some(abbr) = parse_abbreviation(line, 0)
138 && abbr.abbr == word
139 {
140 return true;
141 }
142 }
143 false
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 #[test]
151 fn test_is_abbreviation_definition() {
152 assert!(is_abbreviation_definition("*[HTML]: Hypertext Markup Language"));
154 assert!(is_abbreviation_definition("*[CSS]: Cascading Style Sheets"));
155 assert!(is_abbreviation_definition("*[W3C]: World Wide Web Consortium"));
156 assert!(is_abbreviation_definition("*[CSS3]: CSS Level 3"));
157 assert!(is_abbreviation_definition("*[abbr]: definition"));
158
159 assert!(is_abbreviation_definition("*[HTML]:"));
161 assert!(is_abbreviation_definition("*[HTML]: "));
162
163 assert!(!is_abbreviation_definition("# Heading"));
165 assert!(!is_abbreviation_definition("Regular text"));
166 assert!(!is_abbreviation_definition("[HTML]: Not an abbr"));
167 assert!(!is_abbreviation_definition("*HTML: Not an abbr"));
168 assert!(!is_abbreviation_definition("*[HTML] Not an abbr"));
169 }
170
171 #[test]
172 fn test_parse_abbreviation() {
173 let abbr = parse_abbreviation("*[HTML]: Hypertext Markup Language", 1);
174 assert!(abbr.is_some());
175 let abbr = abbr.unwrap();
176 assert_eq!(abbr.abbr, "HTML");
177 assert_eq!(abbr.definition, "Hypertext Markup Language");
178 assert_eq!(abbr.line, 1);
179
180 let abbr = parse_abbreviation("*[CSS3]: CSS Level 3", 5);
181 assert!(abbr.is_some());
182 let abbr = abbr.unwrap();
183 assert_eq!(abbr.abbr, "CSS3");
184 assert_eq!(abbr.definition, "CSS Level 3");
185 assert_eq!(abbr.line, 5);
186
187 let abbr = parse_abbreviation("Not an abbreviation", 1);
188 assert!(abbr.is_none());
189 }
190
191 #[test]
192 fn test_extract_abbreviations() {
193 let content = r#"# Document
194
195The HTML specification is maintained by the W3C.
196
197CSS is used for styling.
198
199*[HTML]: Hypertext Markup Language
200*[W3C]: World Wide Web Consortium
201*[CSS]: Cascading Style Sheets
202"#;
203 let abbreviations = extract_abbreviations(content);
204 assert_eq!(abbreviations.len(), 3);
205
206 assert_eq!(abbreviations[0].abbr, "HTML");
207 assert_eq!(abbreviations[0].definition, "Hypertext Markup Language");
208
209 assert_eq!(abbreviations[1].abbr, "W3C");
210 assert_eq!(abbreviations[1].definition, "World Wide Web Consortium");
211
212 assert_eq!(abbreviations[2].abbr, "CSS");
213 assert_eq!(abbreviations[2].definition, "Cascading Style Sheets");
214 }
215
216 #[test]
217 fn test_is_defined_abbreviation() {
218 let content = r#"Some text.
219
220*[HTML]: Hypertext Markup Language
221*[CSS]: Cascading Style Sheets
222"#;
223 assert!(is_defined_abbreviation(content, "HTML"));
224 assert!(is_defined_abbreviation(content, "CSS"));
225 assert!(!is_defined_abbreviation(content, "W3C"));
226 assert!(!is_defined_abbreviation(content, "html")); }
228
229 #[test]
230 fn test_get_abbreviation_terms() {
231 let content = r#"Text here.
232
233*[HTML]: Hypertext Markup Language
234*[CSS]: Cascading Style Sheets
235*[W3C]: World Wide Web Consortium
236"#;
237 let terms = get_abbreviation_terms(content);
238 assert_eq!(terms, vec!["HTML", "CSS", "W3C"]);
239 }
240
241 #[test]
242 fn test_might_be_abbreviation() {
243 assert!(might_be_abbreviation("*[HTML]: Definition"));
244 assert!(might_be_abbreviation(" *[HTML]: Definition")); assert!(!might_be_abbreviation("*HTML: Not abbr"));
246 assert!(!might_be_abbreviation("[HTML]: Not abbr"));
247 assert!(!might_be_abbreviation("Regular text"));
248 }
249
250 #[test]
251 fn test_abbreviation_with_special_characters() {
252 let abbr = parse_abbreviation("*[C++]: C Plus Plus", 1);
254 assert!(abbr.is_some());
255 let abbr = abbr.unwrap();
256 assert_eq!(abbr.abbr, "C++");
257
258 let abbr = parse_abbreviation("*[.NET]: Dot NET Framework", 1);
259 assert!(abbr.is_some());
260 let abbr = abbr.unwrap();
261 assert_eq!(abbr.abbr, ".NET");
262 }
263
264 #[test]
265 fn test_multi_word_definitions() {
266 let abbr = parse_abbreviation("*[API]: Application Programming Interface", 1);
267 assert!(abbr.is_some());
268 let abbr = abbr.unwrap();
269 assert_eq!(abbr.definition, "Application Programming Interface");
270 }
271
272 #[test]
273 fn test_empty_definition() {
274 let abbr = parse_abbreviation("*[HTML]:", 1);
275 assert!(abbr.is_some());
276 let abbr = abbr.unwrap();
277 assert_eq!(abbr.abbr, "HTML");
278 assert_eq!(abbr.definition, "");
279 }
280}