rumdl_lib/utils/
mkdocs_attr_list.rs1use regex::Regex;
35use std::sync::LazyLock;
36
37pub static ATTR_LIST_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(r#"\{:?\s*(?:(?:#[a-zA-Z0-9_][a-zA-Z0-9_-]*|\.[a-zA-Z_][a-zA-Z0-9_-]*|[a-zA-Z_][a-zA-Z0-9_-]*=["'][^"']*["'])\s*)+\}"#).unwrap()
47});
48
49static CUSTOM_ID_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"#([a-zA-Z0-9_][a-zA-Z0-9_-]*)").unwrap());
51
52static CLASS_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\.([a-zA-Z_][a-zA-Z0-9_-]*)").unwrap());
54
55static KEY_VALUE_PATTERN: LazyLock<Regex> =
57 LazyLock::new(|| Regex::new(r#"([a-zA-Z_][a-zA-Z0-9_-]*)=["']([^"']*)["']"#).unwrap());
58
59#[derive(Debug, Clone, Default, PartialEq)]
61pub struct AttrList {
62 pub id: Option<String>,
64 pub classes: Vec<String>,
66 pub attributes: Vec<(String, String)>,
68 pub start: usize,
70 pub end: usize,
72}
73
74impl AttrList {
75 pub fn new() -> Self {
77 Self::default()
78 }
79
80 #[inline]
82 pub fn has_id(&self) -> bool {
83 self.id.is_some()
84 }
85
86 #[inline]
88 pub fn has_classes(&self) -> bool {
89 !self.classes.is_empty()
90 }
91
92 #[inline]
94 pub fn has_attributes(&self) -> bool {
95 !self.attributes.is_empty()
96 }
97
98 #[inline]
100 pub fn is_empty(&self) -> bool {
101 self.id.is_none() && self.classes.is_empty() && self.attributes.is_empty()
102 }
103}
104
105#[inline]
107pub fn contains_attr_list(line: &str) -> bool {
108 if !line.contains('{') {
110 return false;
111 }
112 ATTR_LIST_PATTERN.is_match(line)
113}
114
115#[inline]
127pub fn is_standalone_attr_list(line: &str) -> bool {
128 let trimmed = line.trim();
129 if !trimmed.starts_with('{') || !trimmed.ends_with('}') {
131 return false;
132 }
133 ATTR_LIST_PATTERN.is_match(trimmed)
135}
136
137#[inline]
183pub fn is_mkdocs_anchor_line(line: &str) -> bool {
184 let trimmed = line.trim();
185
186 if !trimmed.starts_with("[]()") {
188 return false;
189 }
190
191 let after_link = &trimmed[4..];
193
194 if !after_link.contains('{') {
196 return false;
197 }
198
199 let attr_start = after_link.trim_start();
201
202 if !attr_start.starts_with('{') {
204 return false;
205 }
206
207 let Some(close_idx) = attr_start.find('}') else {
209 return false;
210 };
211
212 if !attr_start[close_idx + 1..].trim().is_empty() {
214 return false;
215 }
216
217 let attr_content = &attr_start[..=close_idx];
219
220 if !ATTR_LIST_PATTERN.is_match(attr_content) {
222 return false;
223 }
224
225 let attrs = find_attr_lists(attr_content);
227 attrs.iter().any(|a| a.has_id() || a.has_classes())
228}
229
230pub fn find_attr_lists(line: &str) -> Vec<AttrList> {
232 if !line.contains('{') {
233 return Vec::new();
234 }
235
236 let mut results = Vec::new();
237
238 for m in ATTR_LIST_PATTERN.find_iter(line) {
239 let attr_text = m.as_str();
240 let mut attr_list = AttrList {
241 start: m.start(),
242 end: m.end(),
243 ..Default::default()
244 };
245
246 if let Some(caps) = CUSTOM_ID_PATTERN.captures(attr_text)
248 && let Some(id_match) = caps.get(1)
249 {
250 attr_list.id = Some(id_match.as_str().to_string());
251 }
252
253 for caps in CLASS_PATTERN.captures_iter(attr_text) {
255 if let Some(class_match) = caps.get(1) {
256 attr_list.classes.push(class_match.as_str().to_string());
257 }
258 }
259
260 for caps in KEY_VALUE_PATTERN.captures_iter(attr_text) {
262 if let Some(key) = caps.get(1)
263 && let Some(value) = caps.get(2)
264 {
265 attr_list
266 .attributes
267 .push((key.as_str().to_string(), value.as_str().to_string()));
268 }
269 }
270
271 if !attr_list.is_empty() {
272 results.push(attr_list);
273 }
274 }
275
276 results
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282
283 #[test]
284 fn test_contains_attr_list() {
285 assert!(contains_attr_list("# Heading {#custom-id}"));
287 assert!(contains_attr_list("# Heading {.my-class}"));
288 assert!(contains_attr_list("# Heading {#id .class}"));
289 assert!(contains_attr_list("Text {: #id}"));
290 assert!(contains_attr_list("Link {target=\"_blank\"}"));
291
292 assert!(!contains_attr_list("# Regular heading"));
294 assert!(!contains_attr_list("Code with {braces}"));
295 assert!(!contains_attr_list("Empty {}"));
296 assert!(!contains_attr_list("Just text"));
297 }
298
299 #[test]
300 fn test_find_attr_lists_basic() {
301 let attrs = find_attr_lists("# Heading {#custom-id}");
302 assert_eq!(attrs.len(), 1);
303 assert_eq!(attrs[0].id, Some("custom-id".to_string()));
304 assert!(attrs[0].classes.is_empty());
305 }
306
307 #[test]
308 fn test_find_attr_lists_with_class() {
309 let attrs = find_attr_lists("# Heading {.highlight}");
310 assert_eq!(attrs.len(), 1);
311 assert!(attrs[0].id.is_none());
312 assert_eq!(attrs[0].classes, vec!["highlight"]);
313 }
314
315 #[test]
316 fn test_find_attr_lists_complex() {
317 let attrs = find_attr_lists("# Heading {#my-id .class1 .class2 data-value=\"test\"}");
318 assert_eq!(attrs.len(), 1);
319 assert_eq!(attrs[0].id, Some("my-id".to_string()));
320 assert_eq!(attrs[0].classes, vec!["class1", "class2"]);
321 assert_eq!(
322 attrs[0].attributes,
323 vec![("data-value".to_string(), "test".to_string())]
324 );
325 }
326
327 #[test]
328 fn test_find_attr_lists_kramdown_style() {
329 let attrs = find_attr_lists("Paragraph {: #para-id .special }");
331 assert_eq!(attrs.len(), 1);
332 assert_eq!(attrs[0].id, Some("para-id".to_string()));
333 assert_eq!(attrs[0].classes, vec!["special"]);
334 }
335
336 #[test]
337 fn test_multiple_attr_lists_same_line() {
338 let attrs = find_attr_lists("[link]{#link-id} and [other]{#other-id}");
339 assert_eq!(attrs.len(), 2);
340 assert_eq!(attrs[0].id, Some("link-id".to_string()));
341 assert_eq!(attrs[1].id, Some("other-id".to_string()));
342 }
343
344 #[test]
345 fn test_attr_list_positions() {
346 let line = "Text {#my-id} more";
347 let attrs = find_attr_lists(line);
348 assert_eq!(attrs.len(), 1);
349 assert_eq!(attrs[0].start, 5);
350 assert_eq!(attrs[0].end, 13);
351 assert_eq!(&line[attrs[0].start..attrs[0].end], "{#my-id}");
352 }
353
354 #[test]
355 fn test_underscore_in_identifiers() {
356 let attrs = find_attr_lists("# Heading {#my_custom_id .my_class}");
357 assert_eq!(attrs.len(), 1);
358 assert_eq!(attrs[0].id, Some("my_custom_id".to_string()));
359 assert_eq!(attrs[0].classes, vec!["my_class"]);
360 }
361
362 #[test]
365 fn test_is_standalone_attr_list() {
366 assert!(is_standalone_attr_list("{ .class-name }"));
368 assert!(is_standalone_attr_list("{: .class-name }"));
369 assert!(is_standalone_attr_list("{#custom-id}"));
370 assert!(is_standalone_attr_list("{: #custom-id .class }"));
371 assert!(is_standalone_attr_list(" { .indented } ")); assert!(!is_standalone_attr_list("Some text {#id}"));
375 assert!(!is_standalone_attr_list("{#id} more text"));
376 assert!(!is_standalone_attr_list("# Heading {#id}"));
377
378 assert!(!is_standalone_attr_list("{ }"));
380 assert!(!is_standalone_attr_list("{}"));
381 assert!(!is_standalone_attr_list("{ random text }"));
382
383 assert!(!is_standalone_attr_list(""));
385 assert!(!is_standalone_attr_list(" "));
386 }
387
388 #[test]
391 fn test_is_mkdocs_anchor_line_basic() {
392 assert!(is_mkdocs_anchor_line("[](){ #example }"));
394 assert!(is_mkdocs_anchor_line("[](){#example}"));
395 assert!(is_mkdocs_anchor_line("[](){ #my-anchor }"));
396 assert!(is_mkdocs_anchor_line("[](){ #anchor_with_underscore }"));
397
398 assert!(is_mkdocs_anchor_line("[](){ .highlight }"));
400 assert!(is_mkdocs_anchor_line("[](){.my-class}"));
401
402 assert!(is_mkdocs_anchor_line("[](){ #anchor .class }"));
404 assert!(is_mkdocs_anchor_line("[](){ .class #anchor }"));
405 assert!(is_mkdocs_anchor_line("[](){ #id .class1 .class2 }"));
406 }
407
408 #[test]
409 fn test_is_mkdocs_anchor_line_kramdown_style() {
410 assert!(is_mkdocs_anchor_line("[](){: #anchor }"));
412 assert!(is_mkdocs_anchor_line("[](){:#anchor}"));
413 assert!(is_mkdocs_anchor_line("[](){: .class }"));
414 assert!(is_mkdocs_anchor_line("[](){: #id .class }"));
415 }
416
417 #[test]
418 fn test_is_mkdocs_anchor_line_whitespace_variations() {
419 assert!(is_mkdocs_anchor_line(" [](){ #example }"));
421 assert!(is_mkdocs_anchor_line("[](){ #example } "));
422 assert!(is_mkdocs_anchor_line(" [](){ #example } "));
423 assert!(is_mkdocs_anchor_line("\t[](){ #example }\t"));
424
425 assert!(is_mkdocs_anchor_line("[]() { #example }"));
427 assert!(is_mkdocs_anchor_line("[]()\t{ #example }"));
428
429 assert!(is_mkdocs_anchor_line("[](){#example}"));
431 }
432
433 #[test]
434 fn test_is_mkdocs_anchor_line_not_anchor_lines() {
435 assert!(!is_mkdocs_anchor_line("[]()"));
437
438 assert!(!is_mkdocs_anchor_line("[](){ }"));
440 assert!(!is_mkdocs_anchor_line("[](){}"));
441
442 assert!(!is_mkdocs_anchor_line("[](url)"));
444 assert!(!is_mkdocs_anchor_line("[text](url)"));
445 assert!(!is_mkdocs_anchor_line("[text](url){ #id }"));
446
447 assert!(!is_mkdocs_anchor_line("[](){ #anchor } extra text"));
449 assert!(!is_mkdocs_anchor_line("[](){ #anchor } <!-- comment -->"));
450
451 assert!(!is_mkdocs_anchor_line("text [](){ #anchor }"));
453 assert!(!is_mkdocs_anchor_line("# Heading [](){ #anchor }"));
454
455 assert!(!is_mkdocs_anchor_line("# Heading"));
457 assert!(!is_mkdocs_anchor_line("Some paragraph text"));
458 assert!(!is_mkdocs_anchor_line("{ #standalone-attr }"));
459
460 assert!(!is_mkdocs_anchor_line("[]{#anchor}")); assert!(!is_mkdocs_anchor_line("[](#anchor)")); assert!(!is_mkdocs_anchor_line("[](){ #anchor")); }
465
466 #[test]
467 fn test_is_mkdocs_anchor_line_edge_cases() {
468 assert!(!is_mkdocs_anchor_line(""));
470 assert!(!is_mkdocs_anchor_line(" "));
471 assert!(!is_mkdocs_anchor_line("\t"));
472
473 assert!(!is_mkdocs_anchor_line("{}"));
475 assert!(!is_mkdocs_anchor_line("{ }"));
476
477 assert!(is_mkdocs_anchor_line("[](){ #id data-value=\"test\" }"));
479
480 assert!(is_mkdocs_anchor_line("[](){ #first #second }"));
482
483 }
486
487 #[test]
488 fn test_is_mkdocs_anchor_line_real_world_examples() {
489 assert!(is_mkdocs_anchor_line("[](){ #installation }"));
491 assert!(is_mkdocs_anchor_line("[](){ #getting-started }"));
492 assert!(is_mkdocs_anchor_line("[](){ #api-reference }"));
493
494 assert!(is_mkdocs_anchor_line("[](){ .annotate }"));
496 assert!(is_mkdocs_anchor_line("[](){ #note .warning }"));
497 }
498
499 #[test]
500 fn test_attr_list_pattern_digit_starting_ids() {
501 assert!(contains_attr_list("{#3rd-party}"));
503 assert!(contains_attr_list("{ #3rd-party }"));
504 assert!(contains_attr_list("{#1}"));
505 assert!(contains_attr_list("{#123-foo}"));
506 assert!(contains_attr_list("{#1st-section}"));
507 assert!(contains_attr_list("{#2nd_item}"));
508
509 assert!(contains_attr_list("{#3rd-party .glossary}"));
511
512 assert!(contains_attr_list("{: #3rd-party}"));
514 }
515
516 #[test]
517 fn test_custom_id_extraction_digit_starting() {
518 let attrs = find_attr_lists("{#3rd-party}");
520 assert_eq!(attrs.len(), 1);
521 assert_eq!(attrs[0].id, Some("3rd-party".to_string()));
522
523 let attrs = find_attr_lists("{#1}");
524 assert_eq!(attrs.len(), 1);
525 assert_eq!(attrs[0].id, Some("1".to_string()));
526
527 let attrs = find_attr_lists("{#123-foo}");
528 assert_eq!(attrs.len(), 1);
529 assert_eq!(attrs[0].id, Some("123-foo".to_string()));
530
531 let attrs = find_attr_lists("{#1st-section}");
532 assert_eq!(attrs.len(), 1);
533 assert_eq!(attrs[0].id, Some("1st-section".to_string()));
534
535 let attrs = find_attr_lists("{#2nd_item}");
536 assert_eq!(attrs.len(), 1);
537 assert_eq!(attrs[0].id, Some("2nd_item".to_string()));
538 }
539
540 #[test]
541 fn test_class_pattern_still_rejects_digit_starting() {
542 let attrs = find_attr_lists("{.3invalid}");
544 assert_eq!(attrs.len(), 0, "Digit-starting class names should not be matched");
545 }
546
547 #[test]
548 fn test_mkdocs_anchor_line_digit_starting_id() {
549 assert!(is_mkdocs_anchor_line("[](){ #3rd-party }"));
551 assert!(is_mkdocs_anchor_line("[](){ #1 }"));
552 assert!(is_mkdocs_anchor_line("[](){ #123-section }"));
553 }
554}